In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
import os
from langchain_google_genai import ChatGoogleGenerativeAI

os.environ['GOOGLE_API_KEY'] = "AIzaSyDzyMWZB82YyWKzf21k6qdiAn4JG6DXL-Q"

model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
loader = PyPDFLoader("/home/omar71023/Downloads/Differential Diagnosis in Internal Medicine_0.pdf")
pages = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)

len(pages), len(splits)

(448, 1487)

In [6]:

from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


In [7]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

In [None]:
retriever.invoke()

[Document(metadata={'page': 5, 'source': '/home/omar71023/Downloads/Differential Diagnosis in Internal Medicine_0.pdf'}, page_content='Since the differential diagnosis is the first clinical exercise after obtain-\ning the clinical history and the results of the basic and complementary examinations, this book has a sharp focus on essential information with a goal of providing clear answers to clinical questions in internal medicine. As many diseases have similarities in clinical presentation differential diag-nosis becomes crucial for making the right clinical diagnosis. For example, physical signs of pulmonary consolidation are present in several conditions involving not only lungs but also cardiovascular system, i.e. pneumonia, primary and metastatic lung cancer, congestive heart failure etc. The impor-tance of differential diagnosis is emphasized by the fact that the right diag-nosis is the key for the efficient treatment.\nThe authors strongly believe that this manual would be usefu

In [10]:
from langchain import PromptTemplate

prompt = """
You are a helpful medicine teacher. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that I cannot answer. Use three sentences maximum and keep the answer concise.

Question: {question} 

Context: {context} 

Answer:
"""

prompt = PromptTemplate(template=prompt, input_variables=['context','question'])

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [12]:
rag_chain.invoke("give me an mcq question about differential diagnosis in chest") 

'Which of the following is NOT a differential diagnosis for chest pain in a patient with a history of left lower chest pain? \n\na) Intercostal neuralgia\nb) Pulmonary embolism\nc) Myositis\nd) Pneumonia/Pleurisy\ne) Splenic flexure syndrome \n\nThe correct answer is (e) Splenic flexure syndrome.  Splenic flexure syndrome is a rare condition and is not typically associated with left lower chest pain. \n'

In [21]:
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

In [144]:
loader = PyPDFDirectoryLoader("dif_diagnosis_documents")
data = loader.load_and_split()

In [145]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=30000, chunk_overlap=200)
context = "\n\n".join(str(p.page_content) for p in data)
     

In [146]:
texts = text_splitter.split_text(context)
print(len(texts))

36


In [12]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [149]:
from langchain.vectorstores import FAISS
vector_index = FAISS.from_texts(texts, embeddings).as_retriever()


In [140]:
query = "what is differential diagnosis"
docs = db.similarity_search(query)
docs

[Document(page_content="DIFFE RENT IAL DIA GNOSIS \nIN INT ERNAL ME DICI NE\nChișinău \x7f 2019Natalia Capr oș, Tatiana D umitraș,  Svetlana N ichita,\nNelea Dr agu/uni021Ba,  Lilia Vlaso v, Olga Cor lăteanu\n\nNicolae Testemițanu State University of Medicine and Pharmacy\nof the Republic of Moldova\nNatalia Caproș, Tatiana Dumitraș, Svetlana Nichita,\nNelea Draguța, Lilia Vlasov, Olga Corlăteanu\nDIFFERENTIAL DIAGNOSIS \nIN INTERNAL MEDICINE\nChișinău • 2019\n\nThe textbook ”Differential Diagnosis in Internal Medicine” was \ndiscussed and approved by the Internal Medicine Methodical Committee at the meeting of May, 7, 2018, Proceedings No 1 and by the Council of Quality Management of Nicolae Testemițanu State University of Medicine and Pharmacy of the Republic of Moldova at the meeting of September, 18, 2018, Proceedings No 1.\nReviewers:\nIon Țîbîrnă – MD, PhD, University ProfessorLivi Grib – MD, PhD, University Professor\nLanguage Editor:\nNadejda Șamșurina – Nicolae Testemițanu Sta

In [284]:
prompt_template = """
You are a medicine chat that provide one and only one short MCQ question about the summarized knowledge from 

\n {context}\n

\nDisplay the question and its four choices each of them in independent line.

"""
prompt = PromptTemplate(template=prompt_template, input_variables=['context'])

In [285]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model='gemini-pro',
                               temperature=0.2)


In [286]:
chain = load_qa_chain(model, chain_type="stuff",
                      prompt=prompt,verbose=True)

In [287]:
query = "athophysiology" 
docs = vector_index.get_relevant_documents(query)



In [288]:
response = chain({"input_documents":docs,"question":query},
                return_only_outputs=True)



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a medicine chat that provide one and only one short MCQ question about the summarized knowledge from 


 60
DIFFERENTIAL DIAGNOSIS IN INTERNAL MEDICINEA Tilt test, which suddenly shifts the patient from a recumbent to an 
upright position, can provoke a neurocardiogenic syncope, which if cor-
related with the clinical symptoms, has diagnostic significance. Further analysis of ECG intervals can noninvasively identify patient groups at increased risk for arrhythmias or sudden death (e. g., heart rate variabil-ity as a measure of autonomic innervation, signal-averaged ECG, T-wave alternans, etc.).
DIFFERENTIAL DIAGNOSIS OF EXTRASYSTOLESExtrasystoles are single beats caused by abnormal impulse generation 
anywhere in the heart (atria, ventricles, conduction system). Extrasystoles are frequently observed in healthy hearts. In most patients extrasyst


[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [289]:
display(Markdown(response.get('output_text')))

**Question:**

Which of the following is NOT a common cause of hypertension?

**Choices:**

A. Renal parenchymal disease
B. Aortic atherosclerosis
C. Obstructive sleep apnea
D. Hyperthyroidism

In [3]:
from langchain_core.prompts import PromptTemplate

In [7]:
prompt_temp = PromptTemplate.from_template("Tell me a joke about {topic}")
prompt_temp.invoke({"topic": "cats"})

StringPromptValue(text='Tell me a joke about cats')

In [15]:
from langchain_core.prompts import ChatPromptTemplate

"""
ValueError: Unexpected message type: omar. Use one
of 'human', 'user', 'ai', 'assistant', or 'system'.
This is called "role"
"""

prompt_temp = ChatPromptTemplate.from_messages([
    ("system", "You are a good AI assistant named {name}"),
    ("user", "Hi how are you?"),
    ("ai", "Hi, How can I help you"),
    ("user","I need some information about {topic}")
])

prompt_temp.invoke({"name":"Bad", "topic":"sports"})


ChatPromptValue(messages=[SystemMessage(content='You are a good AI assistant named Bad'), HumanMessage(content='Hi how are you?'), AIMessage(content='Hi, How can I help you'), HumanMessage(content='I need some information about sports')])

In [30]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
prompt_temp = ChatPromptTemplate.from_messages([
    ("system","you are a chat bot"),
    ("human","how are you"),
    ("ai","fine"),
    MessagesPlaceholder("my_messages") # or use ("placeholder", [list_of_messages])
])

prompt_temp.invoke({"my_messages":[AIMessage(content="asdg"),
                                   HumanMessage(content="بس بقا")]})

# the key of "my_messages" should be a list ... even if we don't pass AIMessage, the default type will be invoked "HumanMessage"


ChatPromptValue(messages=[SystemMessage(content='you are a chat bot'), HumanMessage(content='how are you'), AIMessage(content='fine'), HumanMessage(content='he')])

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
prompt_temp = ChatPromptTemplate.from_messages([
    ("system","you are a chat bot"),
    ("human","how are you"),
    ("ai","fine"),
    ("placeholder","{my_messages}")
])

prompt_temp.invoke({"my_messages":["he"]})


In [26]:
prompt_temp.format(my_messages=[AIMessage(content="asdg"),
                                HumanMessage(content="بس يبنى")])

'System: you are a chat bot\nHuman: how are you\nAI: fine\nAI: asdg\nHuman: بس يبنى'