In [4]:
import torch
from transformers import BartTokenizer, BartForConditionalGeneration, AdamW, get_linear_schedule_with_warmup
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Inference

In [None]:
df = pd.read_csv('/content/drive/MyDrive/VQA-Final/part B/Trained models/CMS-[Model based on QA]/test_dataset.csv')

df.shape

(3274, 3)

In [None]:
df.head()

Unnamed: 0,Question,Object,Answer
0,how does garuda's iconography play his eyes?,garuda,"in both forms, garuda's eyes are depicted as o..."
1,can you outline the creation process of an,akhi jhyal,creating an akhi jhyal involves selecting high...
2,where does kawaguchi indicate the origin of th...,prayer wheel,kawaguchi indicate that the prayer wheel origi...
3,which tibetan master is known for teaching abo...,prayer wheel,tibetan gurus marpa and milarepa are known for...
4,are visitors allowed to take photographs insid...,taleju temple,photography is generally prohibited inside the...


In [6]:
# Load the tokenizer and model (make sure these paths match your setup)
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
checkpoint_path = '/content/drive/MyDrive/VQA-Final/part B/Trained models/final_checkpoint.pth'

# Load your trained model checkpoint
def load_checkpoint(model, file_path):
    checkpoint = torch.load(file_path, map_location='cpu')
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Checkpoint loaded from {file_path}")

load_checkpoint(model, checkpoint_path)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

  checkpoint = torch.load(file_path, map_location='cpu')


Checkpoint loaded from /content/drive/MyDrive/VQA-Final/part B/Trained models/final_checkpoint.pth


BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_lay

In [None]:
# Function to generate answers based on input questions
def ask_question(question, object_name, max_length=80):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    combined_input = f"{object_name} {question}" if object_name else question

    inputs = tokenizer(
        combined_input,
        return_tensors="pt",
        max_length=128,
        truncation=True
    ).to(device)

    answer_ids = model.generate(
        inputs['input_ids'],
        max_length=max_length,
        num_beams=4,
        early_stopping=True
    )
    answer = tokenizer.decode(answer_ids[0], skip_special_tokens=True)
    return answer

In [None]:
sampled_indices = df.sample(10).index
sampled_questions = df.loc[sampled_indices, 'Question']
sampled_objects = df.loc[sampled_indices, 'Object']

answers = []
for question, object_name in zip(sampled_questions, sampled_objects):
    answer = ask_question(question, object_name)
    answers.append({"Question": question, "Answer": answer})

for qa in answers:
    print("Question:", qa["Question"])
    print("Answer:", qa["Answer"])
    print()

Question: what does the association of taleju bhawani with the malla king symbolize?
Answer: the association of taleju bhawani with the malla king symbolizes her role as their royal deity, established by king harisimha deva in bhaktapur.

Question: are khadullu lamps presenting in countries outside of nepal?
Answer: yes, khadullu lamps are occasionally gifted as decorative items among nepali expatriate communities and abroad.

Question: what is the procedure of the terraced plinth of the nyatapola temple?
Answer: the staircase of the nyatapola temple involves a circular step that allows visitors to ascend.

Question: which incident is related to king harisimha deva and goddess taleju?
Answer: king harisimha deva brought goddess taleju to the kathmandu valley in the 14th century, establishing her presence there.

Answer: aruna warned vinata not to open the second egg and cursed her to remain a slave until his brother came to rescue her.

Question: what is the role of animal sacrifice in

### Context retrieval based on QA

In [None]:
!pip install llama-index
!pip install llama-index-embeddings-huggingface
!pip install peft
!pip install auto-gptq
!pip install optimum
!pip install bitsandbytes

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

In [None]:
# import any embedding model on HF hub (https://huggingface.co/spaces/mteb/leaderboard)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
# Settings.embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-large") # alternative model

Settings.llm = None             #  in this case, the system will only use an embedding model and not a full-fledged LLM
Settings.chunk_size = 1000      #  input text is being divided into chunks of 256 tokens/characters (depending on the implementation)
Settings.chunk_overlap = 250   # there is an overlap of 25 tokens/characters between consecutive chunks. Overlap helps maintain context continuity between chunks

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

LLM is explicitly disabled. Using MockLLM.


### Load the vector database

In [None]:
import os

# Define paths
PERSIST_DIR = "/content/drive/MyDrive/VQA-Final/miscellanous dataset/RAG_Context/Storage"
DOCUMENTS_DIR = "/content/drive/MyDrive/VQA-Final/miscellanous dataset/RAG_Context/documents"

if os.path.exists(PERSIST_DIR):
    # Load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
else:
    # Load documents and create the index
    documents = SimpleDirectoryReader(DOCUMENTS_DIR).load_data()
    index = VectorStoreIndex.from_documents(documents)
    # Persist index for future use
    index.storage_context.persist(persist_dir=PERSIST_DIR)

## Retriver engine

In [None]:
# set number of docs to retreive
top_k = 1

# configure retriever
retriever = VectorIndexRetriever(
    index=index,                                # retrives the similar documents from created index
    similarity_top_k=top_k,
)

In [None]:
# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.6)],
)

### Answer Formatter

In [None]:
import re

def format_retrieved_text(retrieved_text):
    """
    Cleans and formats the retrieved text to display only the main content in a readable paragraph format.

    Args:
    retrieved_text (str): The raw text output from the retriever with extra metadata.

    Returns:
    str: Cleaned and formatted paragraph text with proper spacing.
    """
    # Access the 'response' attribute of the Response object to get the actual text content
    text_content = retrieved_text.response

    # Remove metadata patterns (e.g., page labels, file paths) using regex if they appear in the text
    cleaned_text = re.sub(r'page_label:.*\n|file_path:.*\n', '', text_content)

    # Replace multiple whitespace and line breaks with a single space
    formatted_text = re.sub(r'\s+', ' ', cleaned_text).strip()

    return formatted_text

### Retrive based on answers above


In [None]:
# Sample 10 random indices and get corresponding questions and objects
sampled_indices = df.sample(10).index
sampled_questions = df.loc[sampled_indices, 'Question']
sampled_objects = df.loc[sampled_indices, 'Object']

qa_pairs = []

# Generate answers and retrieve context based on each answer
for question, object_name in zip(sampled_questions, sampled_objects):
    # Generate answer based on question and object
    answer = ask_question(question, object_name)

    query_text = f"{object_name} {answer}"
    # Retrieve context based on the generated answer
    resp = query_engine.query(query_text)  # Adjust 'retrieve' as per your vector store library's retrieval method
    formatted_context = format_retrieved_text(resp)

    # Append question, answer, and context to the list
    qa_pairs.append({"Question": question, "Answer": answer, "Context": formatted_context})

# Display the question-answer-context pairs
for qa in qa_pairs:
    print("User:", qa["Question"])
    print("Chat agent:", qa["Answer"])
    print("User: Tell me more about it.")
    print("Chat agent:", qa["Context"])
    print()


User: in what mode do tantric practices incorporate mantra spinning?
Chat agent: in tantric practices, the spinning of mantras is linked to the visualization nadis and chakras.
User: Tell me more about it.
Chat agent: Context information is below. --------------------- practices whereby the Tantric practitioner visualizes mantras revolving around the nadis and especially around the meridian chakras such as the heart and crown. Therefore, prayer wheels are a visual aid for developing one's capacity for these types of Tantric visualizations. The spiritual method for those practicing with a prayer wheel is very specific (with slight variations according to different Buddhist sects). The practitioner most often spins the wheel clockwise, as the direction in which the mantras are written is that of the movement of the sun across the sky. --------------------- Given the context information and not prior knowledge, answer the query. Query: prayer wheel in tantric practices, the spinning of ma