In [17]:
from langchain_community.llms import Ollama 
llm = Ollama(model="llama3", temperature = 0)

In [18]:
llm.invoke("What is LangGraph?")

'LangGraph is a graph-based language model that represents natural language as a graph structure. It\'s a novel approach to modeling language, which differs from traditional sequence-based models like recurrent neural networks (RNNs) and transformers.\n\nIn LangGraph, words or tokens are represented as nodes in a graph, and the relationships between them are captured by edges. These edges can represent various types of dependencies, such as:\n\n1. Syntactic dependencies: e.g., subject-verb-object relationships.\n2. Semantic dependencies: e.g., hyponymy (e.g., "dog" is a type of "animal"), meronymy (e.g., "leg" is a part of "body").\n3. Pragmatic dependencies: e.g., coreference (e.g., referring back to a previously mentioned entity).\n\nLangGraph uses graph neural networks (GNNs) to process and analyze this graph structure, allowing it to capture complex linguistic relationships and contextual information. This can lead to improved performance on various NLP tasks, such as:\n\n1. Langua

In [19]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter



In [20]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("covid.pdf")
docs = loader.load()

In [21]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts = text_splitter.split_documents(docs)

print(texts)
print(len(texts))

191


In [22]:
print(texts[0])

page_content='Handbook of COVID-19 Prevention
and Treatment   
Compiled According to Clinical Experience The First Aﬃliated Hospital, Zhejiang University School of Medicine' metadata={'source': 'covid.pdf', 'page': 0}


In [23]:
from langchain_community.embeddings import OllamaEmbeddings

embedding = OllamaEmbeddings(
    model="nomic-embed-text",
)


In [24]:
vector = embedding.embed_query('Testing the embedding model')

print(len(vector)) 

768


In [25]:
doc_vectors = embedding.embed_documents([t.page_content for t in texts[:5]])

print(len(doc_vectors))  
print(doc_vectors[0])  

5
[0.5590656995773315, -0.09322235733270645, -3.5323660373687744, -0.3923228085041046, 1.307222604751587, -0.8161448836326599, 0.5012456178665161, 0.07055196166038513, 0.9599286317825317, -0.79777991771698, 0.19653348624706268, -0.0995456799864769, 1.138474941253662, 0.3810466229915619, -0.43385565280914307, -0.21392333507537842, -0.21640822291374207, -0.16017290949821472, -0.5812335014343262, -0.45209094882011414, -1.2783700227737427, 0.0519481860101223, -0.6176374554634094, -0.5094877481460571, 2.3774101734161377, 1.2796911001205444, 0.032876454293727875, -0.06475099921226501, -0.6316871643066406, -1.0740753412246704, 1.1340985298156738, -0.7988957762718201, 0.004308106377720833, -0.16619935631752014, -0.6843968629837036, -0.885457456111908, 0.5293845534324646, 0.05229444429278374, -0.9965095520019531, -0.005618790164589882, 2.4037415981292725, 0.1706790030002594, 0.7228993773460388, -2.1214163303375244, -0.3426439166069031, 1.006711721420288, 0.10187336802482605, -0.0229242220520973

In [26]:
from langchain.vectorstores.pgvector import PGVector

CONNECTION_STRING = "postgresql+psycopg2://postgres:132456@localhost:5432/vector_db"
COLLECTION_NAME = 'covid_19'

db = PGVector.from_documents(
    embedding=embedding,
    documents=texts,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

  warn_deprecated(


In [27]:
query = "What is covid 19 "
similar = db.similarity_search_with_score(query, k=2)

for doc in similar:
    print(doc, end="\n\n")

(Document(metadata={'source': 'covid.pdf', 'page': 17}, page_content='mucous \nmembranes, \nsuch as the eyesDamaged \nskin \nexposureIntact skin \nexposureDirect exposure \nof respiratory \ntractHandbook of COVID-19 Prevention and Treatment'), 0.3881559656010515)

(Document(metadata={'source': 'covid.pdf', 'page': 17}, page_content='mucous \nmembranes, \nsuch as the eyesDamaged \nskin \nexposureIntact skin \nexposureDirect exposure \nof respiratory \ntractHandbook of COVID-19 Prevention and Treatment'), 0.3881559656010515)



In [28]:
retriever = db.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k":6}
)

In [29]:
from langchain import hub 

prompt= hub.pull("rlm/rag-prompt")

In [30]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [31]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

In [32]:
rag_chain.invoke("How to handle covid 19?")

'To handle COVID-19, avoid moving too fast and adjust your respiratory rate at 12-15 times per minute. Confining patient activity to an isolation ward is also important. Additionally, educate patients on proper hygiene practices such as wearing surgical masks, handwashing, and cough etiquette to prevent further spread of the virus.'