Load the pdf

In [60]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader(r"D:\The Silent Patient.pdf")
pdf = loader.load()

In [61]:
len(pdf)

787

In [62]:
pdf

[Document(metadata={'producer': 'PDF Expert 2.2 Mac', 'creator': 'PyPDF', 'creationdate': '2021-09-12T13:16:00+01:00', 'moddate': '2021-09-12T13:16:00+01:00', 'source': 'D:\\The Silent Patient.pdf', 'total_pages': 787, 'page': 0, 'page_label': '1'}, page_content=''),
 Document(metadata={'producer': 'PDF Expert 2.2 Mac', 'creator': 'PyPDF', 'creationdate': '2021-09-12T13:16:00+01:00', 'moddate': '2021-09-12T13:16:00+01:00', 'source': 'D:\\The Silent Patient.pdf', 'total_pages': 787, 'page': 1, 'page_label': '2'}, page_content=''),
 Document(metadata={'producer': 'PDF Expert 2.2 Mac', 'creator': 'PyPDF', 'creationdate': '2021-09-12T13:16:00+01:00', 'moddate': '2021-09-12T13:16:00+01:00', 'source': 'D:\\The Silent Patient.pdf', 'total_pages': 787, 'page': 2, 'page_label': '3'}, page_content='Begin Reading\nTable of Contents\nAbout the Author\nCopyright Page\n \nThank you for buying this\nSt. Martin’s Press ebook.\n \nTo receive special offers, bonus content,\nand info on new releases and 

In [63]:
texts = " ".join(i.page_content for i in pdf)

In [64]:
texts



In [65]:
len(texts)

1404407

Divide into chunks

In [66]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 2000, chunk_overlap = 200)
cnks = text_splitter.create_documents([texts])

In [67]:
len(cnks)

783

In [68]:
len(cnks[5].page_content)

1940

In [69]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

emb_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2" 
)

vector_space = Chroma.from_documents(
    documents= cnks,
    embedding=emb_model,
    persist_directory="vect_space"
)

vector_space.persist()

In [70]:
len(vector_space.get()["documents"][5])

9928

In [71]:
retriever = vector_space.as_retriever(
        search_type="similarity",
        search_kwargs={'k': 4}
    )

In [72]:
retriever.invoke("who is the author of the book a silent patient")

[Document(metadata={}, page_content='She told the story of a doctor who had the Pﬁzer jab and when\nquestioned had no idea what was in it. The doctor had never read\nthe literature. We have to stop treating doctors as intellectual giants\nwhen so many are moral and medical pygmies. The doctor did not\neven know that the ‘vaccines’ were not fully approved or that their\ntrials were ongoing. They were, however, asking their patients if\nthey minded taking part in follow-ups for research purposes – yes,\nthe ongoing clinical trial. The nurse said the doctor’s ignorance was\nnot rare and she had spoken to a hospital consultant who had the jab\nwithout any idea of the background or that the ‘trials’ had not been\ncompleted. Nurses and pharmacists had shown the same ignorance. ‘My NHS colleagues have forsaken their duty of care, broken their\ncode of conduct – Hippocratic Oath – and have been brainwashed\njust the same as the majority of the UK public through propaganda\n…’ She said she had 

In [73]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain.chat_models import init_chat_model

In [74]:
os.environ.get("GROQ_API_KEY")
model = init_chat_model("groq:llama-3.1-8b-instant")

In [75]:
from langchain.prompts import PromptTemplate

In [76]:
prompt = PromptTemplate(
    template = """
            You are a helpfull ai assistent. answer according to the provided context. if the context is insufficient, just say you dont know.

            context = {context},
            \n\n
            question = {question}

""",
    input_variables=["context","question"]  
)

In [77]:
question = "tell me about the characteristics of the patient mentioned in this book"
retrieved_dict = retriever.invoke(question)
retrieved_context = " ".join(i.page_content for i in retrieved_dict)

In [78]:
len(retrieved_context)

15873

In [79]:
main_prompt = prompt.invoke({"context": retrieved_context, "question": question})

In [80]:
resp = model.invoke(main_prompt)

In [81]:
print(resp)

content="Based on the given context, the patient mentioned in the book, Alicia Berenson, appears to have the following characteristics:\n\n1. **Withdrawn and isolated**: Alicia has a tendency to keep to herself and does not initiate conversations with others. She has melted into the background and become almost invisible to the other patients.\n2. **Violent behavior**: There is an incident where Alicia becomes violent and tries to slash Elif's throat with a jagged edge. This suggests that she may have a history of aggression or anger issues.\n3. **Self-harming behavior**: Alicia has a history of self-harm, having slashed her wrists twice and engaged in other forms of self-harm upon admission to the unit.\n4. **Difficulty interacting with others**: Alicia does not seem to have any interest in interacting with the other patients or the staff, which suggests that she may have difficulties with social relationships or may be experiencing emotional numbness.\n5. **Lack of emotional expressi