In [None]:
!pip install -q transformers
!pip -q install sentence-transformers
!pip install -q llama-index
!pip install llama-index-llms-huggingface
!pip install llama-index-embeddings-huggingface

In [None]:
!pip3 install torch torchvision torchaudio
!pip install accelerate
!pip install -i https://pypi.org/simple/ bitsandbytes

In [None]:
!pip install pymupdf
!pip install python-dotenv

In [None]:
import fitz  # PyMuPDF
import os
import torch
from dotenv import load_dotenv, find_dotenv

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from llama_index.llms.ollama import Ollama
from llama_index.core.llms import ChatMessage
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex
from llama_index.core.evaluation import DatasetGenerator, RelevancyEvaluator
from llama_index.core import PromptTemplate

In [None]:
# Lets Download PDF file
# Keep this PDF file in new-dir named "/data"
# under "/data" create "/ch1_ch2" dir
# We are going to create a sample PDF file from this which has only 2 chapters.

!wget https://assets.openstax.org/oscms-prodcms/media/documents/ConceptsofBiology-WEB.pdf

In [None]:
# Create a sample pdf for ch1,2
def save_page_ranges(source_pdf_path, output_pdf_path, page_ranges):
    """
    Saves specified ranges of pages from a source PDF to a new PDF file.

    Args:
    source_pdf_path (str): Path to the source PDF file.
    output_pdf_path (str): Path to the output PDF file.
    page_ranges (list of tuples): List of tuples, where each tuple represents a page range to save (inclusive, 0-indexed).
    """
    # Open the source PDF file
    doc = fitz.open(source_pdf_path)
    # Create a new PDF to save selected pages
    new_doc = fitz.open()

    # Iterate through each range and add the pages to the new document
    for start, end in page_ranges:
        new_doc.insert_pdf(doc, from_page=start, to_page=end)

    # Save the new document
    new_doc.save(output_pdf_path)
    new_doc.close()
    doc.close()
    print(f"Specified page ranges have been saved to {output_pdf_path}")

# path to input pdf file
source_pdf_path = '/content/data/ConceptsofBiology-WEB.pdf'
# path to output pdf file
output_pdf_path = '/content/data/ch1_ch2/sample_ch1_ch2_ConceptsofBiology.pdf'

# pass range of pages to extract
page_ranges = [(18, 38), (40, 66)]
save_page_ranges(source_pdf_path, output_pdf_path, page_ranges)


In [None]:
# "BAAI/bge-large-en-v1.5" --> Embedding Dimensions = 1024 | Max Tokens = 512.
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                   device=('cuda' if torch.cuda.is_available() else 'cpu'))

In [None]:
# Check embedding model
embeddings = embed_model.get_text_embedding("Hello World!")
print(len(embeddings))

In [None]:
from llama_index.llms.huggingface import HuggingFaceLLM

def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == 'system':
          prompt += f"<|system|>\n{message.content}</s>\n"
        elif message.role == 'user':
          prompt += f"<|user|>\n{message.content}</s>\n"
        elif message.role == 'assistant':
          prompt += f"<|assistant|>\n{message.content}</s>\n"

    # ensure we start with a system prompt, insert blank if needed
    if not prompt.startswith("<|system|>\n"):
        prompt = "<|system|>\n</s>\n" + prompt

    # add final assistant prompt
    prompt = prompt + "<|assistant|>\n"

    return prompt

def completion_to_prompt(completion):
    return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n"

import torch
from transformers import BitsAndBytesConfig
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM

# quantize to save memory
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

llm = HuggingFaceLLM(
    model_name="microsoft/Phi-3-mini-4k-instruct",
    tokenizer_name="microsoft/Phi-3-mini-4k-instruct",
    # model_name="HuggingFaceH4/zephyr-7b-beta",
    # tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
    context_window=3900,
    max_new_tokens=512,
    model_kwargs={"quantization_config": quantization_config},
    generate_kwargs={"temperature": 0.0},
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    device_map="auto",
)

response = llm.complete("What is the meaning of life?")
print(str(response))

In [None]:
# Converts pdf file into Documents objects for llama-index
loader = SimpleDirectoryReader(
    input_files = ['/content/data/ch1_ch2/sample_ch1_ch2_ConceptsofBiology.pdf'],
    recursive=True,
    required_exts=[".pdf"],
)

documents = loader.load_data()

In [None]:
documents[5]

In [None]:
# Split the loaded documents
splitter = SentenceSplitter(chunk_size=512,chunk_overlap=64)
nodes = splitter.get_nodes_from_documents(documents)

In [None]:
index0 = VectorStoreIndex(nodes=nodes,
                          use_async=True,
                          embed_model=embed_model,
                          show_progress=True)

In [None]:
query_engine0 = index0.as_query_engine(llm=llm)
print(query_engine0.query("The type of logical thinking that uses related observations to arrive at a general conclusion is called?"))

In [None]:
data_gen = DatasetGenerator(nodes=nodes,
                            llm=llm,
                            num_questions_per_chunk=2,
                            question_gen_query="Generate 2 questions per chunk.Restrict the questions to the context information provided.")

In [None]:
eval_questions = data_gen.generate_questions_from_nodes()

In [None]:
eval_questions_updated = [q for q in eval_questions if ("How" in q or "What" in q and not ("pdf" in q or "PDF" in q))]
len(eval_questions_updated)

In [None]:

import json
rel_eval = RelevancyEvaluator(llm=llm)

relevancy_results = []
for q in eval_questions_updated[:5]:
    ques_response = query_engine0.query(q)
    eval_result = json.loads(rel_eval.evaluate_response(query=q, response=ques_response).json())
    relevancy_results.append(eval_result)
    print(f" q --> {q} score --> {eval_result['score']}")

# print(f"Q --> {ques} \nsource --> {ques_response.source_nodes[0].node.get_content()} \neval_result --> {eval_result}\n")