In [1]:
!pip install faiss-cpu transformers sentence-transformers
!pip install pymupdf

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (4.4 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.3.0-py3-none-any.whl.metadata (10 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl.metadata (13 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn->sentence-transformers)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading faiss_cpu-1.9.0-cp311-cp311-macosx_11_0_arm64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sentence_transformers-3.3.0-py3-none-any.whl (268 kB)
Downloading scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl (11.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31m46.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading threadpoolctl-3.5.

In [2]:
%cd ..

/Users/juansegundohevia/Documents/Rice MDS/ELEC631/quantized-education


In [4]:
import transformers
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from commons.retrieval import RAGPipeline
import importlib
import numpy as np
import os 
import commons.model as model
importlib.reload(model)

print("Transformers version:", transformers.__version__)

Transformers version: 4.45.2


# RAG PIPELINE

###  Extract Text from PDF

In [5]:
import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_path, chunk_size=500):
    # Open the PDF
    pdf_document = fitz.open(pdf_path)
    text_chunks = []
    
    for page_num in range(len(pdf_document)):
        page_text = pdf_document[page_num].get_text("text")
        # Split page text into chunks of 'chunk_size' words
        words = page_text.split()
        for i in range(0, len(words), chunk_size):
            chunk = " ".join(words[i:i + chunk_size])
            text_chunks.append(chunk)
    
    pdf_document.close()
    return text_chunks

# Use your PDF file path here
pdf_path = "rag/data/Biology2e-WEB_ICOFkGu.pdf"
documents = extract_text_from_pdf(pdf_path)
print(f"Extracted {len(documents)} chunks from the PDF.")


Extracted 1913 chunks from the PDF.


### Embed the Dataset and Store It in FAISS

In [6]:
# Load embedding model
embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Embed the documents
embeddings = embedder.encode(documents)

# Initialize FAISS index
dimension = embeddings.shape[1]  # Get embedding vector dimension
index = faiss.IndexFlatL2(dimension)

# Add embeddings to the FAISS index
index.add(embeddings)


### Implement the Retrieval Function

In [7]:
def retrieve_documents(query, top_k=2):
    query_embedding = embedder.encode([query])
    _, indices = index.search(query_embedding, top_k)
    results = [documents[i] for i in indices[0]]
    return results

# Test retrieval
query = "Tell me about the structure of an atom"
retrieved_docs = retrieve_documents(query)
print("Retrieved documents:", retrieved_docs)


Retrieved documents: ["Figure 2.5 The periodic table shows each element's atomic mass and atomic number. The atomic number appears above the symbol for the element and the approximate atomic mass appears below it. The periodic table groups elements according to chemical properties. Scientists base the differences in chemical reactivity between the elements on the number and spatial distribution of an atom’s electrons. Atoms that chemically react and bond to each other form molecules. Molecules are simply two or more atoms chemically bonded together. Logically, when two atoms chemically bond to form a molecule, their electrons, which form the outermost region of each atom, come together first as the atoms form a chemical bond. Electron Shells and the Bohr Model Note that there is a connection between the number of protons in an element, the atomic number that distinguishes one element from another, and the number of electrons it has. In all electrically neutral atoms, the number of elec

### Define the RAG Pipeline with LLM

In [8]:
smol = model.SmolModel(is_chat=True,device="cpu",max_tokens=512,
                       anchor_prompt="Hey, you are chatbot helping me to understand my biology homework.")

In [9]:
# Load a language model for generation (e.g., distilgpt-2 for smaller scale)
#generator = pipeline("text-generation", model="distilgpt2")

def rag_pipeline(query):
    retrieved_docs = retrieve_documents(query)
    context = " ".join(retrieved_docs)  # Combine retrieved docs
    prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
    response = smol.chat(prompt=prompt, verbose=True)
    return response

# Test the RAG pipeline
response = rag_pipeline("Tell me about the structure of an atom.")

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


tensor([[    1,  9690,   198, 22234,    28,   346,   359, 11743,  9433,  4307,
           549,   288,  1044,   957,  7505, 10025,    30,     2,   198,     1,
          4093,   198, 17548,    42,  8799,   216,    34,    30,    37,   378,
         15246,  3252,  2744,   971,  4047,   506, 12000,  2389,   284, 12000,
          1230,    30,   378, 12000,  1230,  4541,  2120,   260,  3573,   327,
           260,  4047,   284,   260, 20374, 12000,  2389,  4541,  2441,   357,
            30,   378, 15246,  3252,  2119,  2728,  2289,   288,  2819,  3849,
            30, 11110,  3159,   260,  3581,   281,  2819, 37761,   826,   260,
          2728,   335,   260,  1230,   284,  9034,  4225,   282,   354, 11776,
           417,    99, 10568,    30,  1814,  1388,   338, 25132,  2595,   284,
          4436,   288,   971,   550,   910,  6756,    30,   372, 30427,   359,
          2788,   827,   355,   540,  9288, 25132, 28624,  1592,    30,  8969,
           947,    28,   645,   827,  9288, 25132,  

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


In [11]:
print(response)

system
Hey, you are chatbot helping me to understand my biology homework.
user
Context: Figure 2.5 The periodic table shows each element's atomic mass and atomic number. The atomic number appears above the symbol for the element and the approximate atomic mass appears below it. The periodic table groups elements according to chemical properties. Scientists base the differences in chemical reactivity between the elements on the number and spatial distribution of an atom’s electrons. Atoms that chemically react and bond to each other form molecules. Molecules are simply two or more atoms chemically bonded together. Logically, when two atoms chemically bond to form a molecule, their electrons, which form the outermost region of each atom, come together first as the atoms form a chemical bond. Electron Shells and the Bohr Model Note that there is a connection between the number of protons in an element, the atomic number that distinguishes one element from another, and the number of electr

In [13]:
smol = model.SmolModel(is_chat=True,device="cpu",max_tokens=128, anchor_prompt="Hey, you are chatbot helping me to understand my biology homework.")

In [14]:
smol.clean_history()

In [30]:
import importlib
from commons import retrieval as rag

In [31]:
importlib.reload(rag)

<module 'commons.rag_pipeline' from '/Users/juansegundohevia/Documents/Rice MDS/ELEC631/quantized-education/commons/rag_pipeline.py'>

In [35]:
# Initialize RAG Pipeline
pdf_path = "./rag/data/Biology2e-WEB_ICOFkGu.pdf"
anchor_prompt= "Hey, you are chatbot helping me to understand my biology homework."
pipe = rag.RAGPipeline(pdf_path, anchor_prompt=anchor_prompt, max_tokens=512)


Extracted 1913 chunks from the PDF.


In [39]:
pipe.ask("Tell me about the cycle of water")

ValueError: Input length of input_ids is 3283, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.