# Installing dependencies


In [1]:
%pip install -q -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


# Config

In [2]:
class Config:
    gemini_env = "GEMINI_API_KEY"
    model_clip = "ViT-B/32"
    model_gemini = "gemini-1.5-flash"
    model_embedding = "models/embedding-001"
    model_embedding_text = "models/embedding-001"
    database_path = "./database"
    database_image = "images"
    database_text = "text"
    database_table = "table"
    document_path = "./assets/AnatomyAndPhysiology-LR.pdf"


# Importing dependencies

In [3]:
import os
import dotenv
import torch
import numpy as np
from PIL import Image
import clip
import chromadb
import google.generativeai as genai
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from tqdm.notebook import tqdm
import pandas as pd
dotenv.load_dotenv()

True

In [4]:
gemini_api_key = os.getenv(Config.gemini_env)

# Device

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


# Loading the document


In [18]:
loader = PyPDFLoader(Config.document_path,extract_images=False)
pdf = loader.load()
documents = pdf

# Database init

In [7]:
client = chromadb.PersistentClient(path=Config.database_path)
text_db = client.get_or_create_collection(Config.database_text)

# Image Handling

In [8]:
image_db = client.get_or_create_collection(Config.database_image)
clip_model, preprocess = clip.load(Config.model_clip, device=device)

In [9]:
def encode_image(image_path):
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    with torch.no_grad():
        image_embedding = clip_model.encode_image(image).cpu().numpy()
    return image_embedding
def encode_text(text):
    text = clip.tokenize([text],truncate=True).to(device)
    with torch.no_grad():
        text_embedding = clip_model.encode_text(text).cpu().numpy()
    return text_embedding
def add_image(image_path):
    image_emb = encode_image(image_path)
    image_db.add(embeddings=image_emb.tolist(), metadatas=[{"image_path": image_path}], ids=[image_path])
def search_image(caption,n_results=2):
    text_embedding = encode_text(caption)
    results = image_db.query(query_embeddings=text_embedding.tolist(), n_results=n_results)
    return results

# Loading text into Vector DB

In [None]:
def embed_text(text:str,api_key_number:int):
    api_key = os.getenv(f"GEMINI_API_KEY{api_key_number}")
    if api_key is None:
        raise ValueError(f"GEMINI_API_KEY{api_key_number} not found")
    genai.configure(api_key=api_key)
    model=Config.model_embedding
    return genai.embed_content(
        model=model,
        content=text,
        task_type="retrieval_document",
    )['embedding']

def load_and_process_pdf(pdf_path, text_db):
    # Load the PDF
    global documents  # Declare documents as a global variable
    if 'documents' not in globals():
        print("Loading pdf")
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=100)

    # Embed and store each page as a chunk
    for doc in tqdm(documents[16:17], desc="Processing pages"):
        chunks = text_splitter.split_text(doc.page_content)
        for i, chunk in enumerate(chunks):
            chunk_embedding = embed_text(chunk, api_key_number=1)
            text_db.add(embeddings=chunk_embedding, ids=[str(doc.metadata['page']) + '*' + str(i)])

load_and_process_pdf(Config.document_path, text_db)

# Quering

In [123]:
def expand_page_range(pages):
    # Create a set of unique pages and expand the range by including adjacent pages
    unique_pages = set(pages)
    for page in pages:
        if page - 1 >= 0:
            unique_pages.add(page - 1)  # Add previous page if it exists
        unique_pages.add(page + 1)      # Always add the next page
    return list(sorted(unique_pages))

def retrieve_pages(text_db, query, top_k:int=3, expand_pages:bool=False):
    # Embed the query and retrieve the top K pages from the text database
    text_embedding = embed_text(query)
    results = text_db.query(query_embeddings=text_embedding, n_results=top_k)['ids'][0]
    pages = [int(page) for page in results]
    if expand_pages:
        pages = expand_page_range(pages)  # Expand page range if requested
    return pages

def prompt_fromatting(prompt: str, document: str):
    # Format the prompt for the AI model with specific instructions
    prompt = (
        "You are AI designed to provide accurate and concise answers to your questions about the human body's anatomy.\n"
        "Retrieves information from a vast repository of provided anatomical content to respond to questions.\n"
        "Respond with a clear and accurate answer based on the provided content from the document.\n"
        "If the question is unclear or requires additional context, this application will ask for clarification before providing an answer.\n"
        "If the question requests an explanation in detail, please provide an elaborate response based on the information given in the document.\n"
        f"Question: {prompt}\n"
        f"Document: {document}\n"
        "Note: Responses are limited to the provided anatomical content and do not include personal opinions or external consultations."
    )
    return prompt

def prepare_prompt(query, expand_pages=False):
    # Prepare the prompt by retrieving relevant pages and formatting the content
    pages = retrieve_pages(text_db, query, expand_pages=expand_pages)
    page_content = ''
    for page in pages:
        page_content += pdf[page].page_content + '\n'  # Concatenate content from each page
    return prompt_fromatting(query, page_content), pages

In [125]:
query = 'explain metabolism in detail'
prompt,pages = prepare_prompt(query)

In [126]:
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel(Config.model_gemini)
response = model.generate_content(
    prompt,
    generation_config = genai.GenerationConfig(
        max_output_tokens=1000,
        temperature=0.1,
    )
)

In [127]:
print(response.text)

Metabolism is the sum of all chemical reactions that occur in the body. It encompasses both anabolism and catabolism. 

* **Anabolism** is the process of building larger, more complex molecules from smaller, simpler ones. This process requires energy. For example, your body uses energy to assemble complex chemicals from small molecules derived from the food you eat.
* **Catabolism** is the process of breaking down larger, more complex molecules into smaller, simpler ones. This process releases energy. For example, the complex molecules found in food are broken down so the body can use their parts to assemble the structures and substances needed for life.

Both anabolism and catabolism occur simultaneously and continuously to keep you alive. 

