In [37]:
from langchain_community.llms import Ollama 
llm = Ollama(model="llama3", temperature = 0)

In [38]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter



In [39]:
from langchain_community.document_loaders import JSONLoader

In [40]:
import json
from pathlib import Path
from pprint import pprint


file_path='./test.json'
data = json.loads(Path(file_path).read_text())

In [41]:
pprint(data)

{'age': None,
 'diagnosis': [],
 'gender': None,
 'name': 'Mr. Anderson',
 'treatment': [{'name': 'Metformin', 'prescription': 'as prescribed'}],
 'vital': [{'name': 'blood pressure', 'units': None, 'value': '100/70'},
           {'name': 'blood glucose level', 'units': 'mmol/L', 'value': '8'}]}


In [44]:
loader = JSONLoader(
    file_path='./test.json',
    jq_schema='.',
    text_content=False)

data = loader.load()
pprint(data)

[Document(metadata={'source': '/home/quan/Desktop/pgvector/test.json', 'seq_num': 1}, page_content='{"name": "Mr. Anderson", "age": null, "gender": null, "diagnosis": [], "treatment": [{"name": "Metformin", "prescription": "as prescribed"}], "vital": [{"name": "blood pressure", "value": "100/70", "units": null}, {"name": "blood glucose level", "value": "8", "units": "mmol/L"}]}')]


In [45]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts = text_splitter.split_documents(data)

print(texts)
print(len(texts))

[Document(metadata={'source': '/home/quan/Desktop/pgvector/test.json', 'seq_num': 1}, page_content='{"name": "Mr. Anderson", "age": null, "gender": null, "diagnosis": [], "treatment": [{"name": "Metformin", "prescription": "as prescribed"}], "vital": [{"name": "blood pressure", "value": "100/70", "units": null}, {"name": "blood glucose level", "value": "8", "units": "mmol/L"}]}')]
1


In [46]:
from langchain_community.embeddings import OllamaEmbeddings

embedding = OllamaEmbeddings(
    model="nomic-embed-text",
)


In [47]:
vector = embedding.embed_query('Testing the embedding model')

print(len(vector)) 

768


In [48]:
from langchain.vectorstores.pgvector import PGVector

CONNECTION_STRING = "postgresql+psycopg2://postgres:132456@localhost:5432/vector_db"
COLLECTION_NAME = 'covid_19'

db = PGVector.from_documents(
    embedding=embedding,
    documents=texts,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

  warn_deprecated(


In [49]:
query = "Tell me about mr anderson "
similar = db.similarity_search_with_score(query, k=2)

for doc in similar:
    print(doc, end="\n\n")

(Document(metadata={'source': '/home/quan/Desktop/pgvector/test.json', 'seq_num': 1}, page_content='{"name": "Mr. Anderson", "age": null, "gender": null, "diagnosis": [], "treatment": [{"name": "Metformin", "prescription": "as prescribed"}], "vital": [{"name": "blood pressure", "value": "100/70", "units": null}, {"name": "blood glucose level", "value": "8", "units": "mmol/L"}]}'), 0.5488715337710856)



In [50]:
retriever = db.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k":6}
)

In [51]:
from langchain import hub 

prompt= hub.pull("rlm/rag-prompt")

In [52]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [53]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

In [55]:
rag_chain.invoke("Tell me about mr. Anderson vitals")

"According to the provided context, Mr. Anderson's vital signs include a blood pressure of 100/70 and a blood glucose level of 8 mmol/L."