In [101]:
### Basic dependencies and imports
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")

### Data Ingestion - 3 Ways shown below
1. Text file
2. Web Scraping
3. PDF file

In [None]:
### Data Ingestion - Text File Loader
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")
text_documents = loader.load()
text_documents  # To see the content of the documents

In [None]:
# Data Ingestion - Web Based Loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

# load,chunk and index the content of the html page

loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                       bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                           # class names are the class of the div of the html page, which was found by inspecting the page
                           class_=("post-title", "post-content", "post-header")
                       )))

text_documents = loader.load()
text_documents  # To see the content of the documents

In [None]:
### Data Ingestion - PDF File Loader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('attention.pdf')
docs=loader.load()
docs

### Data Transformation - Text Splitter

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
#text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)    # Normal implementation
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=150)  # Attempt 1 - To increase search accuracy percentage by reducing the chunk size

#NOTE : Reducing the chunk size from 1000 and reducing the chunk overlap from 200 to 150, increased the search accuracy percentage from 59.0010% to 62.6558 %

documents=text_splitter.split_documents(docs)
documents[:5]
#documents

### Vector Embedding and Vector Store Creation

In [None]:
## OpenAI Approach 1 - Vector Embedding And Vector Store - Using OpenAI
from langchain_openai import OpenAIEmbeddings   # For embedding the text using OpenAI
from langchain_community.vectorstores import Chroma # Chroma is a vector db store, provided by langchain_community

#TODO : Research more on various embedding models and vector stores

db = Chroma.from_documents(documents,OpenAIEmbeddings())

In [None]:
## Ollama Approach 1 - Vector Embedding And Vector Store - Using Ollama
from langchain_community.embeddings import OllamaEmbeddings   # For embedding the text using Ollama
from langchain_community.vectorstores import Chroma # Chroma is a vector db store, provided by langchain_community

db = Chroma.from_documents(documents,OllamaEmbeddings())

In [None]:
## Ollama Approach 2 - Vector Embedding And Vector Store - Using Ollama and ecqulidian distance
import os
import chromadb
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

# Create a persistent directory
persist_directory = "chroma_db"
if not os.path.exists(persist_directory):
    os.makedirs(persist_directory)

# Configure Chroma settings
client_settings = chromadb.config.Settings(
    is_persistent=True,
    persist_directory=persist_directory
)

# Initialize Chroma with Ollama embeddings and persistence
db = Chroma.from_documents(
    documents=documents,
    embedding=OllamaEmbeddings(),
    persist_directory=persist_directory,
    client_settings=client_settings,
    collection_name="attention_paper",  # Give your collection a name
)

# Persist the database
db.persist()

In [None]:
## Ollama Approach 3 - Vector Embedding And Vector Store - Using Ollama and cosine distance
import os
import chromadb
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma

# Create a persistent directory and collection name
persist_directory = "chroma_db_cosine"
collection_name = "attention_paper_cosine"

if not os.path.exists(persist_directory):
    os.makedirs(persist_directory)

# Configure Chroma settings
client_settings = chromadb.config.Settings(
    is_persistent=True,
    persist_directory=persist_directory
)

# Create Chroma client
client = chromadb.Client(client_settings)

try:
    client.delete_collection(name=collection_name)
    print(f"Existing collection {collection_name} deleted")
except Exception as e:
    print(f"No existing collection {collection_name} to delete: {e}")

# Create collection with specific distance function
#collection = client.get_or_create_collection(
collection = client.create_collection(
    name=collection_name,
    metadata={"hnsw:space": "cosine"}  # Available options: "cosine", "l2", "ip"
)

# Initialize Chroma with Ollama embeddings and persistence
db_cosine = Chroma.from_documents(
    documents=documents,
    embedding=OllamaEmbeddings(),
    persist_directory=persist_directory,
    client_settings=client_settings,
    collection_name=collection_name
)

# Persist the database
db_cosine.persist()
print(f"Collection {collection_name} saved to {persist_directory} with cosine distance function.")

Existing collection attention_paper_cosine deleted
Collection attention_paper_cosine saved to chroma_db_cosine with cosine distance function.


In [102]:
## FAISS Approach 1 - Vector Embedding And Vector Store - FAISS Vector Database
from langchain_community.vectorstores import FAISS  # FAISS is a vector db store, provided by facebook
import os
import shutil

# Create a persistent directory and collection name
persist_directory = "faiss_db"
faiss_index_name = "attention_paper_index"
index_path = os.path.join(persist_directory, faiss_index_name)

# Check and delete existing index
if os.path.exists(index_path):
    try:
        shutil.rmtree(index_path)
        print(f"Existing FAISS index at {index_path} deleted")
    except Exception as e:
        print(f"Error deleting existing index: {e}")
else:
    # Create directory if it doesn't exist
    if not os.path.exists(persist_directory):
        os.makedirs(persist_directory)
        print(f"Created new directory: {persist_directory}")
    
# Use either of the below(Comment out the one not in use):
## For Embeddings using OpenAI( - Paid)
#db_faiss = FAISS.from_documents(documents[:15], OpenAIEmbeddings())

## For Embeddings using Ollama( - Free as its local)
db_faiss = FAISS.from_documents(
    documents=documents,
    embedding=OllamaEmbeddings()
)

# Save the FAISS index
db_faiss.save_local(
    folder_path=persist_directory,
    index_name=faiss_index_name
)
print(f"FAISS index saved to {os.path.join(persist_directory, faiss_index_name)}")

# To load the saved index later:
db_faiss = FAISS.load_local(
    folder_path=persist_directory,
    index_name=faiss_index_name,
    embeddings=OllamaEmbeddings(),
    allow_dangerous_deserialization=True  # Only use if you trust the source of the index
)

FAISS index saved to faiss_db/attention_paper_index


### Data Querying - From Vector Store(DB)

In [None]:
## Approach 1 - Chroma DB - Simple similarity search - query
query = "Who are the authors of attention is all you need?"

#TODO : Research more on various search methods

retireved_results=db.similarity_search(query)
print(retireved_results[0].page_content)

In [None]:
## Approach 2 - Chroma DB - Similarity search with relevance scores - query
query = "Who are the authors of attention is all you need?"

# Fetch top 3 results for the query
results = db_cosine.similarity_search_with_relevance_scores(query, k=3)
for doc, score in results:
    print(f"\nRelevance Score: {score * 100:.4f} %")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print("-" * 80)


Relevance Score: 62.6558 %
Content: In International Conference on Learning Representations, 2017.
[20] Diederik Kingma and Jimmy Ba. Adam: A method for stochastic optimization. In ICLR, 2015.
[21] Oleksii Kuchaiev and Boris Ginsburg. Factorization tricks for LSTM networks. arXiv preprint
arXiv:1703.10722, 2017.
[22] Zhouhan Lin, Minwei Feng, Cicero Nogueira dos Santos, Mo Yu, Bing Xiang, Bowen
Zhou, and Yoshua Bengio. A structured self-attentive sentence embedding. arXiv preprint
arXiv:1703.03130, 2017.
Metadata: {'source': 'attention.pdf', 'page': 10}
--------------------------------------------------------------------------------

Relevance Score: 56.8199 %
Content: Attention Visualizations
Input-Input Layer5
It
is
in
this
spirit
that
a
majority
of
American
governments
have
passed
new
laws
since
2009
making
the
registration
or
voting
process
more
difficult
.
<EOS>
<pad>
<pad>
<pad>
<pad>
<pad>
<pad>
It
is
in
this
spirit
that
a
majority
of
American
governments
have
passed
new
laws
s

In [57]:
## Approach 3 - FAISS DB - Various Similarity search approaches
query = "Who are the authors of attention is all you need?"

# Helper function to normalize scores
def normalize_score(score):
    """Convert FAISS distance score to percentage similarity."""
    # FAISS returns L2 distance, smaller is better
    # Max reasonable L2 distance is around 2.0 for normalized vectors
    max_l2_distance = 2.0
    similarity = max(0, min(100, (1 - (score / max_l2_distance)) * 100))
    return similarity

# Method 1: Basic similarity search with score threshold
results_with_scores = db_faiss.similarity_search_with_score(
    query=query,
    k=3  # Get top 3 results
)

results_with_scores_in_cosine = db_faiss.similarity_search_with_score(
    query=query,
    k=3,
    metric="cosine"  # Use cosine similarity instead of L2
)

print("Method 1: Basic Similarity Search with Scores - L2 distance")
print("-" * 50)
for doc, score in results_with_scores:
    # Lower score means more similar in FAISS
    similarity_percentage = (1 - score) * 100
    print(f"\nSimilarity: {similarity_percentage:.2f}%")
    print(f"Content: {doc.page_content[:200]}...")
    print(f"Metadata: {doc.metadata}")
    print("-" * 50)
    
print("Method 1: Basic Similarity Search with Scores - cosine similarity")
print("-" * 50)
for doc, score in results_with_scores_in_cosine:
    # Lower score means more similar in FAISS
    similarity_percentage = (1 - score) * 100
    print(f"\nSimilarity: {similarity_percentage:.2f}%")
    print(f"Content: {doc.page_content[:200]}...")
    print(f"Metadata: {doc.metadata}")
    print("-" * 50)

# Method 2: MMR Search for diverse results
mmr_results = db_faiss.max_marginal_relevance_search(
    query=query,
    k=3,  # Number of documents to return
    fetch_k=10,  # Number of documents to fetch before filtering
    lambda_mult=0.7  # Diversity factor (0=max diversity, 1=max similarity)
)

print("\nMethod 2: MMR Search (Diverse Results)")
print("-" * 50)
""" for i, doc in enumerate(mmr_results, 1):
    print(f"\nResult {i}:")
    print(f"Content: {doc.page_content[:200]}...")
    print(f"Metadata: {doc.metadata}")
    print("-" * 50) """
# Get similarity scores for MMR results
for doc in mmr_results:
    # Get similarity score for this document
    score = db_faiss.similarity_search_with_score(
        doc.page_content,  # Use document content as query
        k=1  # Get only the closest match
    )[0][1]  # Extract the score from the result
    
    # Convert score to similarity percentage
    similarity_percentage = (1 - score) * 100
    #similarity_percentage = normalize_score(score)
    
    print(f"\nSimilarity: {similarity_percentage:.2f}%")
    print(f"Content: {doc.page_content[:200]}...")
    print(f"Metadata: {doc.metadata}")
    print(f"Diversity Factor (lambda_mult): 0.7")
    print("-" * 50)

# Method 3: Hybrid search with metadata filtering
# Assuming documents have 'section' metadata
filtered_results = db_faiss.similarity_search_with_score(
    query=query,
    k=3,
    filter={"section": "abstract"}  # Optional: filter by metadata if available
)

print("\nMethod 3: Filtered Similarity Search")
print("-" * 50)
for doc, score in filtered_results:
    similarity_percentage = (1 - score) * 100
    print(f"\nSimilarity: {similarity_percentage:.2f}%")
    print(f"Content: {doc.page_content[:200]}...")
    print(f"Metadata: {doc.metadata}")
    print("-" * 50)

Method 1: Basic Similarity Search with Scores - L2 distance
--------------------------------------------------

Similarity: -1070487.89%
Content: In International Conference on Learning Representations, 2017.
[20] Diederik Kingma and Jimmy Ba. Adam: A method for stochastic optimization. In ICLR, 2015.
[21] Oleksii Kuchaiev and Boris Ginsburg. F...
Metadata: {'source': 'attention.pdf', 'page': 10}
--------------------------------------------------

Similarity: -1234675.39%
Content: Attention Visualizations
Input-Input Layer5
It
is
in
this
spirit
that
a
majority
of
American
governments
have
passed
new
laws
since
2009
making
the
registration
or
voting
process
more
difficult
.
<EOS...
Metadata: {'source': 'attention.pdf', 'page': 12}
--------------------------------------------------

Similarity: -1361067.68%
Content: Language Processing, pages 832–841. ACL, August 2009.
[15] Rafal Jozefowicz, Oriol Vinyals, Mike Schuster, Noam Shazeer, and Yonghui Wu. Exploring
the limits of language mode

### Advanced RAG search - Chain and Retreival using LLM

In [103]:
## The retrival methods used above are not that accurate hence we will use LLM chain to get the more accurate results

from langchain_community.llms import Ollama

# Load the Ollama model
llm = Ollama(model="llama2", temperature=0.7)  # Adjust temperature for creativity

# Define the chat prompt template
from langchain_core.prompts import ChatPromptTemplate
prompt_template = ChatPromptTemplate.from_template(
    """Answer the following question based only on provided context.
    Think step by step before providing a detailed answer.
    <context> {context} <context/>
    Question: {input}"""
)

In [104]:
# Create Stuff Document Chain
from langchain.chains.combine_documents import create_stuff_documents_chain
chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt_template
)

In [105]:
# Create the retriever(an interface to a vector store which has all the information)

# Vector store retriever for FAISS db
retriever = db_faiss.as_retriever(search_kwargs={"k": 5})  # Fetch top 5 documents
#retriever = db_faiss.as_retriever()  # Fetch from all documents
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x10ba3fa70>, search_kwargs={'k': 5})

In [106]:
# Finally, creating the retriever chain

from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x10ba3fa70>, search_kwargs={'k': 5}), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='Answer the following question based only on provided context.\n    Think step by step before providing a detailed answer.\n    <context> {context} <context/>\n    Question: {input}'))])
            | Ollama(temperature=0.7)
            | StrOutputParser(), config={'run_na

In [107]:
# Now, query the chain with a question

query1 = "Who are the authors of attention is all you need?"
query2 = "What is a scaled dot product attention?"
query3 = "What are the types of attention?"
query4 = "How much time did each base model training step took?"

# response = retrieval_chain.invoke({"input": query1})
# response = retrieval_chain.invoke({"input": query2})

# INTERESTING : As we have limited the number of documents to 5 by search_kwargs={"k": 5}, it is not able to find the correct number of items to list.
# BUT, if we remove the search_kwargs={"k": 5} it does not nessecarily return the correct number of items
# response = retrieval_chain.invoke({"input": query3})

response = retrieval_chain.invoke({"input": query4})

print("Question:")
print(response['input'])
# print("Context Used:")
# print(response['context'])
print("Answer:")
# To the point answer, BUT CAN CHANGE WITH EACH RUN, and hence accuracy is not guaranteed
print(response['answer'])

Question:
How much time did each base model training step took?
Answer:
Based on the context provided, we can infer that the author is asking for the time taken by each base model training step.

From the text, we know that there were 8 P100 GPUs used for training, and the training took 3.5 days. We can also see that the author mentions that the configuration of the model is listed in the bottom line of Table 3.

Therefore, we can calculate the time taken by each base model training step as follows:

Time taken by each base model training step = 3.5 days / 8 GPUs = 0.4425 days (or approximately 1 hour and 27 minutes) per training step.
