### Data Ingestion

In [3]:
from langchain_community.document_loaders import PyPDFLoader

In [24]:
filepath = '1st year books\Guyton and Hall.pdf'
loader = PyPDFLoader(filepath)

text_docs = loader.load()

In [25]:
type(text_docs)

list

In [28]:
len(text_docs)

1063

In [26]:
# print(text_docs[40].page_content)
print(text_docs[2].page_content)

This page intentionally left blank


In [29]:
# for i in range(len(text_docs)):
#     if text_docs[i].page_content == 'This page intentionally left blank':
#         del text_docs[i]

### Breaking into chunks

In [36]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

In [37]:
docs = text_splitter.split_documents(text_docs)

In [38]:
len(docs)

12872

In [39]:
docs[30].page_content

'The Cell and Its Functions 11\nOrganization of the Cell 11\nPhysical Structure of the Cell 12\nComparison of the Animal Cell with  \nPrecellular Forms of Life 17\nFunctional Systems of the Cell 18\nLocomotion of Cells 23\nCHAPTER 3\nGenetic Control of Protein Synthesis, Cell \nFunction, and Cell Reproduction 27\nGenes in the Cell Nucleus 27\nThe DNA Code in the Cell Nucleus Is  \nTransferred to an RNA Code in the Cell  \nCytoplasm—The Process of Transcription 30\nSynthesis of Other Substances in the Cell 35'

### Transformation into embeddings

In [50]:
import os
api_key = os.environ["GEMINI_API_KEY"]

In [55]:

from langchain_google_genai import GoogleGenerativeAIEmbeddings

gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)

In [None]:
from langchain_community.vectorstores import Chroma
try: 
    #if vectorstore already exists
    vectorstore = Chroma(persist_directory="gemini_vectorstore", embedding_function=gemini_embeddings)
    vectorstore.persist()
except:
    #if vectorstore does not exist
    vectorstore = Chroma.from_documents(docs, gemini_embeddings)

In [None]:
#create a retriever to search in your vectorstore
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6})

In [None]:
import google.generativeai as genai
genai.configure(api_key=api_key)

#Set model config and load model
generation_config = {
  "temperature": 2,
  "top_p": 0.4,
  "top_k": 32,
  "max_output_tokens": 8192,
}

model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest",
                              generation_config=generation_config,
                              system_instruction="Your name is Lia.\\nYou are a helpful virtual assistant who specializes in the field of biology, chemistry, medicine. You are all aspiring doctors best study guide. Students will ask you questions related to biology, along with some scraped information from their text books. Your job is to answer the questions from the provided information, along with what you know. Try answering the questions to the best of your ability.\n\nprompt structure:''' Question: {Question of the student}\n                                   Information: {Material provided by the user}\n'''\nYour answer should be created referencing to the information provided in the prompt.",)

In [None]:
convo = model.start_chat(
  history=[
    {
      "role": "user",
      "parts": [
        "Your name is Lia.\\nYou are a helpful virtual assistant who specializes in the field of biology, chemistry, medicine. You are all aspiring doctors best study guide. Students will ask you questions related to biology, along with some scraped information from their text books. Your job is to answer the questions from the provided information, along with what you know. Try answering the questions to the best of your ability.\n\nprompt structure:''' Question: {Question of the student}\n                                   Information: {Material provided by the user}\n'''\nYour answer should be created referencing to the information provided in the prompt.",
      ],
    },
    {
      "role": "model",
      "parts": [
        "Okay, I'm ready. Ask me anything!  I'll do my best to answer your biology, chemistry, or medicine-related questions using the information you provide and my own knowledge base. I'm excited to be your virtual study guide!\n",
      ],
    },
  ]
)

In [None]:
def chat_with_gemini_model(question):
    question = question

    #retrieve information from the vectordb
    result = retriever.invoke(question)
    info = ''
    if len(result) == 0:
        print("No information found")
        return
    elif len(result) > 2:
        result = result[:2]
    for i in range(len(result)):
        #append the relevant information to the prompt
        info+=result[i].page_content
    
    #create the prompt
    prompt = f"Question: {question}\nInformation: {info}"

    response = convo.send_message(prompt)
    print(response.text)

In [None]:
question = input("Enter your question: ")
# use the model
chat_with_gemini_model(question)

### Same implementation but with ollama model
Need to install ollama locally first

<li>Create a file named custom-llama (no extensions to be given)</li>
<li>Put the following in the file:<br>

```FROM llama3.2```<br>
```PARAMETER temperature 1```

```PARAMETER num_ctx 4096```

```SYSTEM {Replace with how you want the model to behave}.```
</li>

<li> Assuming you have ollama installed, run the two following commands in powershell:<br>

```ollama pull llama3.2```<br>
```ollama create custom-llama-model -f { file path to the custom-llama file}```
</li>


#### After completing the following steps:

In [None]:
response: ChatResponse = chat(model='custom-llama-model', messages=[
  {
    'role': 'user',
    'content': 'Hi! what can you do?',
  },
])
print(response['message']['content'])
# or access fields directly from the response object
print(response.message.content)

In [None]:
def chat_with_llama(question):
    result = retriever.invoke(question)
    info = ''
    if len(result) == 0:
        print("No information found")
        return
    for i in range(len(result)):
        info+=result[i].page_content
    
    response: ChatResponse = chat(model='custom-llama-model', messages=[
  {
    'role': 'user',
    'content':  f"Question: {question}\nInformation: {info}",
  },
])
    
    print(response['message']['content'])
# or access fields directly from the response object
    print(response.message.content)

In [None]:
query=input("Enter your question: ")
chat_with_llama(query)