In [13]:
import os
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

USER_VECTOR_DB_PATH = "user_vector_stores"  # Base directory for all users' vector stores

def get_user_vector_store(user_id, embeddings):
    """
    Load the FAISS vector store for the given user. If it doesn't exist, create a new one.
    """
    user_db_path = os.path.join(USER_VECTOR_DB_PATH, user_id)
    
    if os.path.exists(user_db_path):  # Load existing vector store
        print(f"Loading vector store for user: {user_id}")
        return FAISS.load_local(user_db_path, embeddings=embeddings, allow_dangerous_deserialization=True)
    
    else:  # Create a new vector store for this user
        print(f"Creating new vector store for user: {user_id}")
        single_vector = embeddings.embed_query("this is some text data")
        index = faiss.IndexFlatL2(len(single_vector))
        
        return FAISS(
            embedding_function=embeddings,
            index=index,
            docstore=InMemoryDocstore(),
            index_to_docstore_id={}
        )


In [10]:
from docling.document_converter import DocumentConverter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import Document

def process_and_store_user_data(user_id, source_file, embeddings):
    """
    Convert the document, split it, and store embeddings in the user's vector database.
    """
    converter = DocumentConverter()
    result = converter.convert(source_file)

    markdown_text = result.document.export_to_markdown()
    docs = [Document(page_content=markdown_text)]

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=1000)
    chunks = text_splitter.split_documents(docs)

    # Load or create user-specific vector store
    vector_store = get_user_vector_store(user_id, embeddings)

    # Add documents to the user’s vector store
    vector_store.add_documents(documents=chunks)

    # Save the updated vector store
    user_db_path = os.path.join(USER_VECTOR_DB_PATH, user_id)
    vector_store.save_local(user_db_path)
    print(f"User {user_id} data stored successfully.")


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

from langchain_ollama import ChatOllama



model = ChatOllama(model="llama3.2:1b", base_url="http://localhost:11434")

model.invoke("hi")

prompt = """
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    Question: {question} 
    Context: {context} 
    Answer:
"""

prompt = ChatPromptTemplate.from_template(prompt)

In [14]:
def get_user_retriever(user_id, embeddings):
    """
    Retrieve the appropriate user's data.
    """
    vector_store = get_user_vector_store(user_id, embeddings)
    
    return vector_store.as_retriever(search_type="mmr", search_kwargs={
        'k': 3, 
        'fetch_k': 100,
        'lambda_mult': 1
    })

def user_rag_pipeline(user_id, question, model, embeddings):
    """
    Process the query using the RAG pipeline for the given user.
    """
    retriever = get_user_retriever(user_id, embeddings)
    
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
    )
    
    return rag_chain.invoke(question)


def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])


In [17]:
# User 1: Processing and Storing Data
from langchain_ollama import OllamaEmbeddings

# Initialize embeddings
embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url="http://localhost:11434")

user_1 = "user_123"
pdf_path_1 = "C:\\Users\\Archents1\\LangChain\\4.FAISS multiple users\\GenepoweRx Total New Sample Report 23-44-46.pdf"
process_and_store_user_data(user_1, pdf_path_1, embeddings)

# User 2: Processing and Storing Data
user_2 = "user_456"
pdf_path_2 = "C:\\Users\\Archents1\\LangChain\\4.FAISS multiple users\\Ghost Y-Genetic Report (1)-46-53-2-3.pdf"
process_and_store_user_data(user_2, pdf_path_2, embeddings)

# User 1: Querying their own data
# question_1 = "What is the good response to the painkillers?"
# output_1 = user_rag_pipeline(user_1, question_1, model, embeddings)
# print("User 1 Response:", output_1)

# User 2: Querying their own data

pharmacogenomics_output_structure = """{
    "pharmacogenomics_report": {
        "<response_category_type>": {
            "number_of_variants_analyzed": "<integer>",
            "number_of_gene_markers_evaluated": "<integer>",
            "data_validated_on": "<integer>",
            "number_of_studies_evaluated": "<integer>",
            "responses": {
                "<if Molecule_Class else General>": {
                    "Good_Response": ["<drug_name_1>", "<drug_name_2>", "..."],
                    "Intermediate_Response": ["<drug_name_1>", "<drug_name_2>", "..."],
                    "Poor_Response": ["<drug_name_1>", "<drug_name_2>", "..."],
                    "Evidence_not_found": ["<drug_name_1>", "<drug_name_2>", "..."]
                    },
                "...": {}
            }
        },
        "...": {}
    }
    }"""
question_2 = "generate the structure as given below {pharmacogenomics_output_structure}"
output_2 = user_rag_pipeline(user_2, question_2, model, embeddings)
print("User 2 Response:", output_2)


Loading vector store for user: user_123
User user_123 data stored successfully.
Loading vector store for user: user_456
User user_456 data stored successfully.
Loading vector store for user: user_456
User 2 Response: | Molecule Class          | Good Response   | Intermediate Response   | Poor Response                                                      | Evidence not  found                                                        |
|-------------------------|-----------------|-------------------------|--------------------------------------------------------------------|----------------------------------------------------------------------------|
| Sulfonylureas           | Metformin       | Gliclazides,  Glibenclamide,  Glimepiride,  Glipizide,  Gliquidone   | Glyburide  Chlorpropamide  Tolazamide  Tolbutamide                           |
| GLP-1 receptor agonists | Liraglutide             | Exenatide  Liraglunatide Lixisenatide  Dulagutide  Albiglutide Semaglutide      | Glimepiride  Gl