In [None]:
import os
import re
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import LlamaCpp
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate


def load_documents():
    loader = PyPDFDirectoryLoader("/content/drive/MyDrive/GenAI")
    return loader.load()


def split_documents(docs):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
    return text_splitter.split_documents(docs)


def create_embeddings():
    return SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")


def create_vectorstore(chunks, embeddings):
    return Chroma.from_documents(chunks, embeddings)


def initialize_llm():
    return LlamaCpp(
        model_path="/content/drive/MyDrive/BioMistral-7B.Q4_K_M.gguf",
        temperature=0.2,
        max_tokens=2048,
        top_p=1
    )


def create_prompt_template():
    template = """
<|context|>
You are a Medical Assistant that follows the instructions and generates accurate responses based on the query and the context provided.
Please be truthful and give direct answers.
</s>
<|user|>
{query}
</s>
<|assistant|>
"""
    return ChatPromptTemplate.from_template(template)


def create_rag_chain(retriever, llm, prompt):
    return (
        {"context": retriever, "query": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )


def main():
    # Set environment variables
    os.environ['HUGGINGFACEHUB_API_TOKEN'] = "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"

    # Load and process documents
    docs = load_documents()
    chunks = split_documents(docs)

    # Create embeddings and vector store
    embeddings = create_embeddings()
    vectorstore = create_vectorstore(chunks, embeddings)

    # Create retriever and LLM
    retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
    llm = initialize_llm()

    # Create prompt template and RAG chain
    prompt = create_prompt_template()
    rag_chain = create_rag_chain(retriever, llm, prompt)

    # Interactive loop
    while True:
        user_input = input("Input query: ")
        if user_input == "exit":
            print("Exiting...")
            break
        if user_input == "":
            continue

        result = rag_chain.invoke(user_input)
        f1 = rag_chain.invoke("whats the f1 score of the previous query, just give score dont need explanation")
        match = re.search(r'F1 score.*?(\d+\.\d+)', f1)
        if match:
            b = match.group(1)

        print(result)
        print("F1 Score =", b)


if __name__ == "__main__":
    main()
