# **LLM Model Initialization**

In [1]:
!pip install transformers accelerate bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2

In [None]:
from huggingface_hub import login
from dotenv import load_dotenv
import os

# Load from .env file
load_dotenv()

# Get token from environment
login(token=os.getenv("HF_TOKEN"))

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_name = "meta-llama/Llama-2-7b-chat-hf"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with automatic device mapping and efficient precision
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",           # Automatically assigns to GPU if available
    torch_dtype="auto",          # Uses float16/bfloat16 if supported
    low_cpu_mem_usage=True
)

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [6]:
chatbot = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

Device set to use cuda:0


In [7]:
prompt = """You are an academic assistant for undergraduate students. Answer the following question using only official university guidelines.

Q: What is the minimum CPI required for branch change?
A:"""

response = chatbot(prompt)
print(response[0]['generated_text'])

You are an academic assistant for undergraduate students. Answer the following question using only official university guidelines.

Q: What is the minimum CPI required for branch change?
A: According to university guidelines, the minimum CPI required for a branch change is 2.0. This means that a student must have a cumulative grade point average (CPI) of at least 2.0 in order to change their branch. It is important to note that this requirement may vary depending on the faculty or program, so it is best to consult with an academic advisor for more information.


# **PDF Loading and Parsing**

In [8]:
!pip install -q PyMuPDF

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [12]:
import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    full_text = ""
    for page in doc:
        full_text += page.get_text()
    doc.close()
    return full_text

pdf_path = "ugrulebook.pdf"

# Extract full text
document_text = extract_text_from_pdf(pdf_path)

# Previewing first 1000 characters
print(document_text[:1000])

 
 
 
 
INDIAN INSTITUTE OF TECHNOLOGY BOMBAY 
 
 
Rules & Regulations  
for Undergraduate Programmes 
 
 
 
Applicable to the B.Tech., B.S., B.Des.,   
Dual Degree students admitted from the  
Academic Year 2007 - 2008 
 
 
 
 
 
 
 
 
 
 
Updated: January, 2025  
2                                      
Move to Index 
Rules are classified into three separate categories as follows: (I) those which may be implemented 
within a department by DUGC/DPGC, (ii) those that require a decision at the level of Associate/ Dean 
Academic Progamme or UGAPEC/PGAPEC, based on recommendations from the department bodies 
(iii) those that need to be discussed in the Senate for a decision. 
 
Therefore, rules are colored with one of three colors. 
1. The color green indicates that the final authority for rule is the Convener DUGC 
2. The color yellow, and underlined means that the final authority is Associate Dean (Academic 
Programme)/ Dean (Academic Programme) 
3. The color yellow, without an underline

# **Text Splitter**

In [13]:
!pip install -q langchain

In [24]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,       # Number of characters in each chunk
    chunk_overlap=200,     # Overlap between chunks
    separators=["\n\n", "\n", ".", " "]
)

chunks = text_splitter.split_text(document_text)

# **Vector Database**

In [27]:
!pip install -q faiss-cpu sentence-transformers langchain
!pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 k

In [28]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

vector_db = FAISS.from_texts(chunks, embedding_model)

vector_db.save_local("faiss_index_ug_rulebook")

  embedding_model = HuggingFaceEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# **Searching and passing the context to the prompt**

In [36]:
def build_prompt(context, question):
    return f"""
Answer the following question using **only** the information in the context below.

Context:
{context}

Question: {question}
Answer:"""

In [37]:
def get_answer_from_pdf(query, k=3):
    # Search top k relevant chunks
    docs = vector_db.similarity_search(query, k=k)
    context = "\n\n".join([doc.page_content for doc in docs])

    prompt = build_prompt(context, query)

    # Generate answer using LLaMA
    response = chatbot(prompt)[0]["generated_text"]
    if "Answer:" in response:
        return response.split("Answer:")[1].strip()
    else:
        return response.strip()

# **Custom prompt passing to the model and processing the output**

In [38]:
query = "What is the minimum CPI required for branch change?"
answer = get_answer_from_pdf(query)
print("Query:", query)
print("Answer:\n", answer)

Query: What is the minimum CPI required for branch change?
Answer:
 9

Explanation: According to the context, the minimum CPI required for branch change is 9. This is mentioned in the second set of criteria for valid requests for branch change, where the “Branch-Change-CPI” of the student is required to be at least 9.


# **Displaying output using UI**

In [32]:
!pip install -q gradio

In [34]:
import gradio as gr

def gradio_chatbot(query):
    return get_answer_from_pdf(query)

interface = gr.Interface(
    fn = gradio_chatbot,
    inputs = gr.Textbox(
        lines = 2,
        placeholder = "Ask something from the UG Rulebook"
    ),
    outputs = "text",
    title = "UG Rulebook Chatbot",
    description = "Ask questions based on the UG Rulebook PDF"
)

In [35]:
interface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://be0630f27b3ddbdf5c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# **Evaluating LLM Performance**

In [48]:
# Evaluation dataset
eval_data = [
    {
        "question": "How many credits are needed for a minor?",
        "expected": "30 credits.",
    },
    {
        "question": "Can a dual degree student apply for a minor?",
        "expected": "Yes.",
    },
    {
        "question": "What happens if you don’t complete 30 credits for a minor?",
        "expected": "If you don’t complete 30 credits for a minor, the minor will not be awarded. However, the individual course credits earned will be reflected in the transcript.",
    },
]

In [49]:
for item in eval_data:
    q = item["question"]
    expected = item["expected"]
    predicted = get_answer_from_pdf(q)
    print(f"❓ Q: {q}")
    print(f"✅ Expected: {expected}")
    print(f"🤖 Predicted: {predicted}\n")
    print("-" * 60)

❓ Q: How many credits are needed for a minor?
✅ Expected: 30 credits.
🤖 Predicted: 30 credits

Explanation: According to the information provided in the context, a minor requires 30 credits worth of additional courses.

------------------------------------------------------------
❓ Q: Can a dual degree student apply for a minor?
✅ Expected: Yes.
🤖 Predicted: Yes.

------------------------------------------------------------
❓ Q: What happens if you don’t complete 30 credits for a minor?
✅ Expected: If you don’t complete 30 credits for a minor, the minor will not be awarded. However, the individual course credits earned will be reflected in the transcript.
🤖 Predicted: If you don’t complete 30 credits for a minor, the minor will not be awarded. However, the individual course credits earned will be reflected in the transcript.

------------------------------------------------------------


In [50]:
# Accuracy Metric
from difflib import SequenceMatcher

correct = 0
for item in eval_data:
    pred = get_answer_from_pdf(item["question"]).lower()
    def is_similar(a, b, threshold=0.6):
      return SequenceMatcher(None, a.lower(), b.lower()).ratio() > threshold
    if is_similar(item["expected"], pred):
      correct += 1

accuracy = correct / len(eval_data)
print(f"🔢 Manual Accuracy: {accuracy * 100:.2f}%")

🔢 Manual Accuracy: 100.00%
