In [1]:
from langchain_community.document_loaders import GitbookLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma

from fastapi import FastAPI
# from langserve import add_routes

from langchain.chains import RetrievalQA
from langchain.chains import create_history_aware_retriever
from langchain import hub





import streamlit as st
from langchain.chains import ConversationalRetrievalChain


from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_core.messages import HumanMessage

from langchain.callbacks.base import BaseCallbackHandler



from typing import AsyncIterator, Iterator

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document

import json


# from langchain_core.pydantic_v1 import BaseModel, Field
from pydantic import BaseModel, Field
from typing import Optional

from langchain.chains.query_constructor.ir import (
    Comparator,
    Comparison,
    Operation,
    Operator,
    StructuredQuery,
)
# from langchain.retrievers.self_query.chroma import ChromaTranslator
from langchain_community.query_constructors.chroma import ChromaTranslator


# from babel.dates import format_date, format_datetime, format_time
from datetime import datetime, timedelta



from langchain.globals import set_debug
set_debug(False)



import os
from dotenv import load_dotenv

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

from langchain_community.embeddings import OllamaEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
def init_vars(retriever_top_k = 5):
    load_dotenv(override=True)

    COMPLETION_URL = os.getenv("COMPLETION_URL")
    COMPLETION_MODEL = os.getenv("COMPLETION_MODEL")
    EMBEDDING_URL = os.getenv("EMBEDDING_URL")
    EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
    DB_PATH = os.getenv("DB_PATH")


    if COMPLETION_URL and COMPLETION_MODEL:
        llm = ChatOpenAI(base_url=COMPLETION_URL, model=COMPLETION_MODEL, temperature=0)
        print("model: ", COMPLETION_MODEL, "base_url: ", COMPLETION_URL)
    else:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
        print("model: gpt-3.5-turbo")


    if EMBEDDING_URL and EMBEDDING_MODEL:
        emb = OllamaEmbeddings(base_url=EMBEDDING_URL, model=EMBEDDING_MODEL, temperature=0)
        print("model: ", EMBEDDING_MODEL, "base_url: ", EMBEDDING_URL)
    else:
        emb = OpenAIEmbeddings(model="text-embedding-3-small")
        print("model: text-embedding-3-small")

    

    vectorstore = Chroma(persist_directory=DB_PATH,embedding_function=emb)

    default_retriever = vectorstore.as_retriever(search_kwargs = {"k": retriever_top_k})

    return llm, emb, vectorstore, default_retriever, retriever_top_k









def rag_chain_constructor(retriever):
    

    contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    # history_aware_retriever = create_history_aware_retriever(
    #     ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125"), retriever, contextualize_q_prompt
    # )
    history_aware_retriever = create_history_aware_retriever(
        llm, retriever, contextualize_q_prompt
    )



    qa_system_prompt = """You are an assistant, called "BioData Catalyst(BDC) Assistant", for question-answering tasks related BioData Catalyst. \
Use the following pieces of retrieved context to answer the question. \
If you can't get an answer base on the context, just say that you don't know. \
Use 1-3 sentences and keep the answer concise, unless otherwise specified.\
The context are retrieved based on the user query and the chat history.\
If there is context provided, answer the question based on the context.\

### context: {context}"""
    
    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", qa_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )

    # question_answer_chain = create_stuff_documents_chain(ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125"), qa_prompt)
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
    
    
    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
    
    return rag_chain



llm, emb, vectorstore, default_retriever, retriever_top_k = init_vars(retriever_top_k=5)


default_rag_chain = rag_chain_constructor(default_retriever)

model:  meta-llama/Meta-Llama-3.1-8B-Instruct base_url:  http://localhost:8080/v1
model:  bge-m3 base_url:  http://localhost:11434


  emb = OllamaEmbeddings(base_url=EMBEDDING_URL, model=EMBEDDING_MODEL, temperature=0)


In [3]:
def parse_text(answer, context) -> str:

    output = answer
    docs = context
    
    sources = []
    titles = []
    contents = []
    for doc in docs:
        source = doc.metadata["file_path"]
        
        if not source in sources:
            sources.append(source)
            titles.append(f"{doc.metadata['doc_type']}: {doc.metadata['file_path']}")
            contents.append(doc.page_content)


    if len(sources) == 1:
        output += "\n\n#### Source:\n"
    elif len(sources) > 1:
        output += "\n\n#### Sources:\n"

    for i, source in enumerate(sources):
        output += f"{i + 1}. [{titles[i]}]({source})\n"
        output += f"{contents[i]}\n\n\n"

    return output

In [5]:
prompt = "What is the latest update?"

default_rag_chain.invoke({"input": prompt, "chat_history": []})

{'input': 'What is the latest update?',
 'chat_history': [],
 'context': [Document(metadata={'date': '2024-03-13', 'display_date': 'March 13, 2024', 'file_path': 'interim-bdc-website/src/data/events/2024-03-13_Community-Hours.mdx', 'forum_post': 'https://bdcatalyst.freshdesk.com/support/discussions/topics/60000407674', 'location': 'Virtual via Zoom', 'path': '/events/2024-03-13/community_hours', 'registration_required': True, 'tags': 'community hours', 'time': '1:00 - 2:00 pm ET', 'title': 'BDC March Community Hours', 'url': 'https://renci.zoom.us/webinar/register/2117093062319/WN_qWm4svX0T36KwiIAO-mORA'}, page_content='Join us on Wednesday, March 13 1pm ET for a data update on BDC with Sweta Ladwa, Chief, Scientific Solutions Delivery Branch, NHLBI.\n\nEven if you cannot attend the session live, you can still\xa0[register](https://renci.zoom.us/webinar/register/2117093062319/WN_qWm4svX0T36KwiIAO-mORA)\xa0to have the session recording and slides sent to you post-event.\n\nAll users are