In [1]:
# !pip install -r requirements.txt

In [2]:
import os
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [4]:
chat = ChatOpenAI(
    openai_api_key=os.environ["OPENAI_API_KEY"],
    model='gpt-3.5-turbo'
)

In [5]:
import pandas as pd
dataset = pd.read_csv("medquad.csv")
dataset.head()

Unnamed: 0,qtype,Question,Answer
0,susceptibility,Who is at risk for Lymphocytic Choriomeningiti...,LCMV infections can occur after exposure to fr...
1,symptoms,What are the symptoms of Lymphocytic Choriomen...,LCMV is most commonly recognized as causing ne...
2,susceptibility,Who is at risk for Lymphocytic Choriomeningiti...,Individuals of all ages who come into contact ...
3,exams and tests,How to diagnose Lymphocytic Choriomeningitis (...,"During the first phase of the disease, the mos..."
4,treatment,What are the treatments for Lymphocytic Chorio...,"Aseptic meningitis, encephalitis, or meningoen..."


In [6]:
import pinecone

pinecone.init(
    api_key = os.environ["PINECONE_API_KEY_MAIN"],
    environment= os.environ["PINECONE_ENV"]
)

  from tqdm.autonotebook import tqdm


In [7]:
#I have already created a vector database
index_name = 'med277-medquad'
index = pinecone.Index(index_name)

In [8]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.16407,
 'namespaces': {'': {'vector_count': 16407}},
 'total_vector_count': 16407}

In [16]:
from langchain.embeddings.openai import OpenAIEmbeddings
my_key = os.environ["OPENAI_API_KEY"]
# embed_model = OpenAIEmbeddings(model="text-embedding-ada-002")
embed = OpenAIEmbeddings(
    model="text-embedding-ada-002", openai_api_key=my_key, disallowed_special=()
)

In [22]:
from langchain.vectorstores import Pinecone

text_field = "question"

vectorstore = Pinecone(index, embed.embed_query, text_field)

In [23]:
from langchain.chat_models import ChatOpenAI
my_key = os.environ["OPENAI_API_KEY"]
llm = ChatOpenAI(temperature=0, openai_api_key=my_key)

In [24]:
from langchain.retrievers.multi_query import MultiQueryRetriever

retriever = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(), llm=llm
)

In [25]:
# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [26]:
question = "Who is at risk for Lymphocytic Choriomeningitis (LCM)?"

docs = retriever.get_relevant_documents(query=question)
len(docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the factors that increase the risk of Lymphocytic Choriomeningitis (LCM)?', '2. Can you provide information on the demographics most susceptible to Lymphocytic Choriomeningitis (LCM)?', '3. Which groups of individuals are more prone to contracting Lymphocytic Choriomeningitis (LCM)?']


5

In [27]:
docs

[Document(page_content='Who is at risk for Lymphocytic Choriomeningitis (LCM)? ?', metadata={'answer': 'LCMV infections can occur after exposure to fresh urine, droppings, saliva, or nesting materials from infected rodents.  Transmission may also occur when these materials are directly introduced into broken skin, the nose, the eyes, or the mouth, or presumably, via the bite of an infected rodent. Person-to-person transmission has not been reported, with the exception of vertical transmission from infected mother to fetus, and rarely, through organ transplantation.', 'qtype': 'susceptibility'}),
 Document(page_content='Who is at risk for Lymphocytic Choriomeningitis (LCM)? ?', metadata={'answer': 'Individuals of all ages who come into contact with urine, feces, saliva, or blood of wild mice are potentially at risk for infection. Owners of pet mice or hamsters may be at risk for infection if these animals originate from colonies that were contaminated with LCMV, or if their animals are 

In [28]:
def augment_prompt(query: str):
    # get top 3 results from knowledge base
    results = vectorstore.similarity_search(query, k=3)
    # get the text from the results
    source_knowledge = "\n".join([x.page_content for x in results])
    # feed into an augmented prompt
    augmented_prompt = f"""You are a helpful assistant who answers user queries using the
    contexts provided. If the question cannot be answered using the information
    provided say "I don't know"

    Contexts:
    {source_knowledge}

    Query: {query}"""
    return augmented_prompt

In [31]:
import openai
from langchain.llms import OpenAI

In [35]:
query = "Chandu ke chacha ne chandu ki chachi se kya kaha?"
augmented_prompt = augment_prompt(query)

llm = OpenAI(temperature=1, openai_api_key=my_key)
res = llm(query)

In [36]:
print(res)



Chandu ke Chacha ne Chandu ki Chachi ko Kaha, "Humari bacchi ke sapno ko pura karne mein humari aapki madad zaroori hai."
