In [38]:
from langchain_community.llms import Ollama 
llm = Ollama(model="llama3", temperature = 0)

In [39]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter



In [40]:
from langchain_community.document_loaders import JSONLoader

In [41]:
import json
from pathlib import Path
from pprint import pprint


file_path='./sample-data.json'
data = json.loads(Path(file_path).read_text())

In [42]:
pprint(data)

[{'health_vital': [{'status': 'normal', 'units': '', 'value': ''}],
  'medical_diagnosis': [{'name': 'fever'}, {'name': 'dizziness'}],
  'medical_treatment': [{'name': 'medicines', 'prescription': ''},
                        {'name': 'orange juices', 'prescription': ''}],
  'patient_age': 16,
  'patient_gender': 'male',
  'patient_name': 'Adam'},
 {'health_vital': [{'status': 'normal', 'units': '', 'value': ''}],
  'medical_diagnosis': [{'name': 'fever'}, {'name': 'dizziness'}],
  'medical_treatment': [{'name': 'medicine', 'prescription': ''}],
  'patient_age': 16,
  'patient_gender': 'male',
  'patient_name': 'Bob A'},
 {'health_vital': [{'status': 'normal', 'units': '', 'value': ''}],
  'medical_diagnosis': [{'name': 'fever'}, {'name': 'headache'}],
  'medical_treatment': [{'name': 'medicine', 'prescription': ''},
                        {'name': 'full meal per day', 'prescription': ''}],
  'patient_age': 16,
  'patient_gender': '',
  'patient_name': 'Craig'},
 {'health_vital': [{'s

In [43]:
loader = JSONLoader(
    file_path='./sample-data.json',
    jq_schema='.',
    text_content=False)

data = loader.load()
pprint(data)

[Document(metadata={'source': '/home/quan/Desktop/pgvector/sample-data.json', 'seq_num': 1}, page_content="[{'patient_name': 'Adam', 'patient_age': 16, 'patient_gender': 'male', 'medical_diagnosis': [{'name': 'fever'}, {'name': 'dizziness'}], 'medical_treatment': [{'name': 'medicines', 'prescription': ''}, {'name': 'orange juices', 'prescription': ''}], 'health_vital': [{'status': 'normal', 'value': '', 'units': ''}]}, {'patient_name': 'Bob A', 'patient_age': 16, 'patient_gender': 'male', 'medical_diagnosis': [{'name': 'fever'}, {'name': 'dizziness'}], 'medical_treatment': [{'name': 'medicine', 'prescription': ''}], 'health_vital': [{'status': 'normal', 'value': '', 'units': ''}]}, {'patient_name': 'Craig', 'patient_age': 16, 'patient_gender': '', 'medical_diagnosis': [{'name': 'fever'}, {'name': 'headache'}], 'medical_treatment': [{'name': 'medicine', 'prescription': ''}, {'name': 'full meal per day', 'prescription': ''}], 'health_vital': [{'status': 'normal', 'value': '', 'units': ''

In [91]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=125, chunk_overlap=35)
texts = text_splitter.split_documents(data)

print(texts)
print(len(texts))

[Document(metadata={'source': '/home/quan/Desktop/pgvector/sample-data.json', 'seq_num': 1}, page_content="[{'patient_name': 'Adam', 'patient_age': 16, 'patient_gender': 'male', 'medical_diagnosis': [{'name': 'fever'}, {'name':"), Document(metadata={'source': '/home/quan/Desktop/pgvector/sample-data.json', 'seq_num': 1}, page_content="[{'name': 'fever'}, {'name': 'dizziness'}], 'medical_treatment': [{'name': 'medicines', 'prescription': ''}, {'name':"), Document(metadata={'source': '/home/quan/Desktop/pgvector/sample-data.json', 'seq_num': 1}, page_content="'prescription': ''}, {'name': 'orange juices', 'prescription': ''}], 'health_vital': [{'status': 'normal', 'value': '',"), Document(metadata={'source': '/home/quan/Desktop/pgvector/sample-data.json', 'seq_num': 1}, page_content="[{'status': 'normal', 'value': '', 'units': ''}]}, {'patient_name': 'Bob A', 'patient_age': 16, 'patient_gender': 'male',"), Document(metadata={'source': '/home/quan/Desktop/pgvector/sample-data.json', 'seq_

In [92]:
from langchain_community.embeddings import OllamaEmbeddings

embedding = OllamaEmbeddings(
    model="nomic-embed-text",
)


In [93]:
from langchain.vectorstores.pgvector import PGVector

CONNECTION_STRING = "postgresql+psycopg2://postgres:132456@localhost:5432/vector_db"
COLLECTION_NAME = 'patients'

db = PGVector.from_documents(
    embedding=embedding,
    documents=texts,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

  warn_deprecated(


In [94]:
query = "Tell me about John Doe "
similar = db.similarity_search_with_score(query, k=2)

for doc in similar:
    print(doc, end="\n\n")

(Document(metadata={'source': '/home/quan/Desktop/pgvector/sample-data.json', 'seq_num': 1}, page_content="'', 'units': ''}]}, {'patient_name': 'John Doe',"), 0.42463600475833274)

(Document(metadata={'source': '/home/quan/Desktop/pgvector/sample-data.json', 'seq_num': 1}, page_content="'normal', 'value': '', 'units': ''}]}, {'patient_name': 'John Doe', 'patient_age': 45,"), 0.47456234410519493)



In [95]:
retriever = db.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k":6}
)

In [96]:
from langchain import hub 

prompt= hub.pull("rlm/rag-prompt")

In [97]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [98]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

In [103]:
rag_chain.invoke("Tell me about Bob medical_treatments")

'According to the context, Bob\'s medical treatments include "medicine" and "full meal per day". There is no prescription information provided for these treatments.'