In [None]:
!pip install dotenv langchain_chroma langchain_huggingface langchain_community langchain_google_genai chromadb pypdf

Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Collecting langchain_chroma
  Downloading langchain_chroma-0.2.4-py3-none-any.whl.metadata (1.1 kB)
Collecting langchain_huggingface
  Downloading langchain_huggingface-0.3.0-py3-none-any.whl.metadata (996 bytes)
Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.7-py3-none-any.whl.metadata (7.0 kB)
Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting pypdf
  Downloading pypdf-5.7.0-py3-none-any.whl.metadata (7.2 kB)
Collecting python-dotenv (from dotenv)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.

In [None]:
# Importing all the necessary modules
import os
from dotenv import load_dotenv
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables.passthrough import RunnablePassthrough

# Setting envoirnment variables
load_dotenv()
os.environ['GOOGLE_API_KEY'] = os.getenv("GOOGLE_API_KEY")
os.environ['LANGCHAIN_TRACING_V2'] = os.getenv("LANGCHAIN_TRACING_V2")
os.environ['LANGCHAIN_ENDPOINT'] = os.getenv("LANGCHAIN_ENDPOINT")
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

# Choosing LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens = 1024)

In [None]:
# Loading and splitting the pdfs as a string
pdfData = []
for pdfs in os.listdir("/content/pdfs"):
    pdf = PyPDFLoader("/content/pdfs/" + pdfs).load()
    text = ""
    for page in pdf:
        text += page.page_content + "\n"
    pdfData.append(text)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=200)
data = []
for info in pdfData:
    data += text_splitter.split_text(info)

In [None]:
# Embedding the documents and storing them for retrival
embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vectorstore = Chroma(
    collection_name="EmbeddingMatrixes",
    embedding_function=embed,
    persist_directory="vdb")
vectorstore.add_texts(data)
retriever = vectorstore.as_retriever(search_kwargs={"k":3}) # K-Nearest Neighbours

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Preparing the template for input and creating the ragchain pipeline
TEMPLATE = """
You are a helpful AI Assistant.
Using only the factual information form the provided context, answer the given question. If possible, elaborate on your response briefly using only the information in the context. Do not show any external knowledge, if the context lacks relevant facts, just say you can't answer.

---

Context:

{context}

---

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(TEMPLATE)
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

get_answer = lambda query : rag_chain.invoke(query) # getting the answer

In [None]:
# Functions to calculate cosine similarity and to log outputs
def cos_sim(a, b):
    a,b = np.array(a), np.array(b)
    a = a / (np.sum(a**2) ** 0.5)
    b = b / (np.sum(b**2) ** 0.5)
    return np.dot(a,b)

def log_retrival(query):
    retrieved = retriever.invoke(query)
    query_vec = embed.embed_query(query)
    temp = []
    for info in retrieved:
        info_vec = embed.embed_query(info.page_content)
        sim = cos_sim(info_vec, query_vec)
        temp.append((sim, info.page_content))
    return temp

In [None]:
def answer_with_logs(query):
    answer = get_answer(query)
    logs = log_retrival(query)
    print("Answer : " + answer)
    for sim,info in logs:
        print("Similarity : ", sim, " For the doc : ", info)

In [None]:
answer_with_logs("What is the main innovation introduced in the \"Attention is All You Need\" paper?")

Answer : The main innovation introduced in the "Attention is All You Need" paper is scaled dot-product attention, multi-head attention and the parameter-free position representation.
Similarity :  0.5115201040317792  For the doc :  [Tur20] Project Turing. Microsoft research blog, Feb 2020.
[VBL+16] Oriol Vinyals, Charles Blundell, Timothy Lillicrap, Daan Wierstra, et al. Matching Networks for One
Shot Learning. In Advances in neural information processing systems, pages 3630–3638, 2016.
[VSP+17] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez,Łukasz
Kaiser, and Illia Polosukhin. Attention is all you need. In Advances in neural information processing
systems, 2017.
[WPN+19] Alex Wang, Yada Pruksachatkun, Nikita Nangia, Amanpreet Singh, Julian Michael, Felix Hill, Omer
Levy, and Samuel Bowman. Superglue: A stickier benchmark for general-purpose language understand-
ing systems. In Advances in Neural Information Processing Systems, pages 3261–3275, 

In [None]:
answer_with_logs("How does BERT differ from traditional left-to-right language models?")

Answer : BERT does not use traditional left-to-right or right-to-left language models for pre-training. Instead, it uses two unsupervised tasks.
Similarity :  0.6566741941534275  For the doc :  single sequence. We differentiate the sentences in
two ways. First, we separate them with a special
token ([SEP]). Second, we add a learned embed-
ding to every token indicating whether it belongs
to sentence A or sentence B. As shown in Figure 1,
we denote input embedding as E, the ﬁnal hidden
vector of the special [CLS] token as C ∈RH,
and the ﬁnal hidden vector for the ith input token
as Ti ∈RH.
For a given token, its input representation is
constructed by summing the corresponding token,
segment, and position embeddings. A visualiza-
tion of this construction can be seen in Figure 2.
3.1 Pre-training BERT
Unlike Peters et al. (2018a) and Radford et al.
(2018), we do not use traditional left-to-right or
right-to-left language models to pre-train BERT.
Instead, we pre-train BERT using two unsu

In [None]:
answer_with_logs("Describe the few-shot learning capability of GPT-3 with an example.")

Answer : GPT-3 was evaluated in few-shot settings. In TriviaQA, GPT-3 achieved a score of 71.2 in the few-shot setting.
Similarity :  0.6385795825295677  For the doc :  The remainder of this paper is organized as follows. In Section 2, we describe our approach and methods for training
GPT-3 and evaluating it. Section 3 presents results on the full range of tasks in the zero-, one- and few-shot settings.
Section 4 addresses questions of data contamination (train-test overlap). Section 5 discusses limitations of GPT-3.
Section 6 discusses broader impacts. Section 7 reviews related work and Section 8 concludes.
2 Approach
Our basic pre-training approach, including model, data, and training, is similar to the process described in [RWC+19],
with relatively straightforward scaling up of the model size, dataset size and diversity, and length of training. Our use
of in-context learning is also similar to [RWC+19], but in this work we systematically explore different settings for
learning withi

In [None]:
answer_with_logs("What is the loss function used in CLIP and why is it effective?")

Answer : The loss function for CLIP is:

L= −12N∑i=1(log exp(cos(fI(Ii),fT(Ti))/τ)∑Nj=1 exp(cos(fI(Ii),fT(Tj))/τ)+ log exp(cos(fI(Ii),fT(Ti))/τ)∑Nj=1 exp(cos(fI(Ij),fT(Ti))/τ)),

where fI and fT correspond to image and text encoders respectively, cos(·) denotes the cosine similarity between the inputs, and τ is a learnable temperature initialized at 0.07.

The context states that this simple training framework fails to model the semantic information of inputs due to the simplicity of the data structure. This results in inferior performances on tasks that require reasoning ability, e.g., visual question answering and visual commonsense reasoning. Also, the image and text features reside in separate spaces, which makes it difficult to model the interactions between different modalities. Finally, the massive time and resource consumption in the training procedure set restrictions on performing a full pre-training schedule from scratch.
Similarity :  0.4663825749056508  For the doc :  imag

In [None]:
answer_with_logs("What approach does LLaMA take to reduce computational cost during training?")

Answer : To improve training speed and reduce memory usage, LLaMA uses an efficient implementation of the causal multi-head attention mechanism. This implementation is available in the xformers library and is inspired by Rabe and Staats (2021).
Similarity :  0.5637893308622195  For the doc :  ferent models are given in Table 2.
2.3 Optimizer
Our models are trained using the AdamW opti-
mizer (Loshchilov and Hutter, 2017), with the fol-
lowing hyper-parameters: β1 = 0.9,β2 = 0.95.
We use a cosine learning rate schedule, such that
the ﬁnal learning rate is equal to 10% of the maxi-
mal learning rate. We use a weight decay of0.1 and
gradient clipping of 1.0. We use 2,000 warmup
0 200 400 600 800 1000 1200 1400
Billion of tokens
1.5
1.6
1.7
1.8
1.9
2.0
2.1
2.2Training loss
LLaMA 7B
LLaMA 13B
LLaMA 33B
LLaMA 65B
Figure 1: Training loss over train tokens for the 7B,
13B, 33B, and 65 models. LLaMA-33B and LLaMA-
65B were trained on 1.4T tokens. The smaller models
were trained on 1.0T tokens. 

In [None]:
get_answer("Give me recipe for Briani")

"I'm sorry, but I cannot answer that question with the context provided."

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [21]:
!cp -r "/content/vdb" "/content/drive/MyDrive"