In [1]:
import cohere
import numpy as np
import faiss
import pandas as pd
import string

from rank_bm25 import BM25Okapi
from sklearn.feature_extraction import _stop_words
from tqdm import tqdm
from transformers import AutoModelForCausalLM
from langchain.vectorstores import FAISS
from langchain import PromptTemplate
from langchain.chains import RetrievalQA


In [2]:
api_key = "T94Gyskpuxe33SoWIJUTWzgi6P17fK4YS5OhGSl5"

In [3]:
co = cohere.Client(api_key=api_key)

In [4]:
text = """
Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan.
It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine.
Set in a dystopian future where humanity is struggling to survive, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for mankind.

Brothers Christopher and Jonathan Nolan wrote the screenplay, which had its origins in a script Jonathan developed in 2007.
Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar.
Cinematographer Hoyte van Hoytema shot it on 35 mm movie film in the Panavision anamorphic format and IMAX 70 mm.
Principal photography began in late 2013 and took place in Alberta, Iceland, and Los Angeles.
Interstellar uses extensive practical and miniature effects and the company Double Negative created additional digital effects.

Interstellar premiered on October 26, 2014, in Los Angeles.
In the United States, it was first released on film stock, expanding to venues using digital projectors.
The film had a worldwide gross over $677 million (and $773 million with subsequent re-releases), making it the tenth-highest grossing film of 2014.
It received acclaim for its performances, direction, screenplay, musical score, visual effects, ambition, themes, and emotional weight.
It has also received praise from many astronomers for its scientific accuracy and portrayal of theoretical astrophysics. Since its premiere, Interstellar gained a cult following,[5] and now is regarded by many sci-fi experts as one of the best science-fiction films of all time.
Interstellar was nominated for five awards at the 87th Academy Awards, winning Best Visual Effects, and received numerous other accolades
"""


texts = text.split('.')

texts = [t.strip(' \n') for t in texts]

In [5]:
respones = co.embed(
    texts=texts,
    input_type="search_document"
).embeddings

embeds = np.array(respones)
print(embeds.shape)

(15, 4096)


In [6]:
dim = embeds.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(np.float32(embeds))

In [7]:
def search(query, num_of_results=3):
    query_embed = co.embed(
        texts=[query],
        input_type="search_query"
    ).embeddings[0]

    distances, similar_item_ids = index.search(np.float32([query_embed]), num_of_results)

    text_np = np.array(texts)
    results = pd.DataFrame(data={
        "texts":text_np[similar_item_ids[0]],
        "distances":distances[0]
    })

    return results

In [8]:
query = "how precise was the science"

result = search(query, 1)

print(result)

                                               texts     distances
0  It has also received praise from many astronom...  10757.379883


In [9]:
def bm25_tokenizer(text):
    tokenized_doc = []
    for token in text.lower().split():
        token = token.strip(string.punctuation)
        if len(token) > 0 and token not in _stop_words.ENGLISH_STOP_WORDS:
            tokenized_doc.append(token)
    return tokenized_doc

In [10]:
tokenized_corpus = []

for passage in tqdm(texts):
    tokenized_corpus.append(bm25_tokenizer(passage))

bm25 = BM25Okapi(tokenized_corpus)

100%|██████████| 15/15 [00:00<00:00, 91048.57it/s]


In [11]:
def keyword_search(query, top_k=3, num_candidates=15):
    print("Input question:", query)

    ##### BM25 search (lexical search) #####
    bm25_scores = bm25.get_scores(bm25_tokenizer(query))
    top_n = np.argpartition(bm25_scores, -num_candidates)[-num_candidates:]
    bm25_hits = [{'corpus_id': idx, 'score': bm25_scores[idx]} for idx in top_n]
    bm25_hits = sorted(bm25_hits, key=lambda x: x["score"], reverse=True)
    for hit in bm25_hits[0:top_k]:
        print("\t{:.3f}\t{}".format(hit['score'], texts[hit['corpus_id']].replace("\n", " ")))

In [12]:
keyword_search(query = "how precise was the science")

Input question: how precise was the science
	1.789	Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan
	1.373	Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar
	0.000	It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine


In [13]:
query = "how precise was the science"
results = co.rerank(query=query, documents=texts, top_n=3, return_documents=True)
results.results

[RerankResponseResultsItem(document=RerankResponseResultsItemDocument(text='It has also received praise from many astronomers for its scientific accuracy and portrayal of theoretical astrophysics'), index=12, relevance_score=0.16981852),
 RerankResponseResultsItem(document=RerankResponseResultsItemDocument(text='The film had a worldwide gross over $677 million (and $773 million with subsequent re-releases), making it the tenth-highest grossing film of 2014'), index=10, relevance_score=0.07004896),
 RerankResponseResultsItem(document=RerankResponseResultsItemDocument(text='Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar'), index=4, relevance_score=0.0043994132)]

In [14]:
for idx, result in enumerate(results.results):
    print(idx, result.relevance_score , result.document.text)

0 0.16981852 It has also received praise from many astronomers for its scientific accuracy and portrayal of theoretical astrophysics
1 0.07004896 The film had a worldwide gross over $677 million (and $773 million with subsequent re-releases), making it the tenth-highest grossing film of 2014
2 0.0043994132 Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar


In [15]:
def keyword_and_reranking_search(query, top_k=3, num_candidates=10):
    print("Input question:", query)

    ##### BM25 search (lexical search) #####
    bm25_scores = bm25.get_scores(bm25_tokenizer(query))
    top_n = np.argpartition(bm25_scores, -num_candidates)[-num_candidates:]
    bm25_hits = [{'corpus_id': idx, 'score': bm25_scores[idx]} for idx in top_n]
    bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)

    print(f"Top-3 lexical search (BM25) hits")
    for hit in bm25_hits[0:top_k]:
        print("\t{:.3f}\t{}".format(hit['score'], texts[hit['corpus_id']].replace("\n", " ")))

    #Add re-ranking
    docs = [texts[hit['corpus_id']] for hit in bm25_hits]

    print(f"\nTop-3 hits by rank-API ({len(bm25_hits)} BM25 hits re-ranked)")
    results = co.rerank(query=query, documents=docs, top_n=top_k, return_documents=True)
    for hit in results.results:
        print("\t{:.3f}\t{}".format(hit.relevance_score, hit.document.text.replace("\n", " ")))

In [16]:
keyword_and_reranking_search(query = "how precise was the science")

Input question: how precise was the science
Top-3 lexical search (BM25) hits
	1.789	Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan
	1.373	Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar
	0.000	Interstellar uses extensive practical and miniature effects and the company Double Negative created additional digital effects

Top-3 hits by rank-API (10 BM25 hits re-ranked)
	0.004	Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar
	0.004	Set in a dystopian future where humanity is struggling to survive, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for mankind
	0.003	Brothers Christopher and Jonat

In [17]:
# This is a RAG which uses LLM api from Cohere

query = "income generated"

# 1- Retrieval
# We'll use embedding search. But ideally we'd do hybrid

### Dense retrieval
results = search(query)


### Generation
# 2- Grounded Generation
docs_dict = [{'text': text} for text in results['texts']]
response = co.chat(
    message = query,
    documents=docs_dict
)

### Retrieval + Generation => Grounded LLM (RAG)
print(response.text)

The film grossed over $677 million worldwide, and $773 million with subsequent re-releases.


In [18]:
# # To download any model from huggingface
# # This is a direct way of downloading models, you need to search about how to show 
# # that we have access to certain models

# from huggingface_hub import snapshot_download

# snapshot_download(
#     repo_id="meta-llama/Llama-3.1-8B-Instruct",
#     local_dir="/home/abhinav/Desktop/AIProjects/NLP/Building_RAG/"
# )

In [19]:
# # The below code is to download using transformers library

# import torch
# from transformers import AutoModelForCausalLM, AutoTokenizer

# model_name = ""

# # The below code is to download a model
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# tokenizer.save_pretrained(f"cache/tokenizer/{model_name}")

# model = AutoModelForCausalLM.from_pretrained(model_name)
# model.save_pretrained(f"cache/model/{model_name}")


# # The below code is to load the model from local directory
# tokenizer = AutoTokenizer.from_pretrained(f"cache/tokenizer/{model_name}")
# model = AutoModelForCausalLM.from_pretrained(f"cache/model/{model_name}")

In [20]:
# # We download all the models from huggingface to the cache directory
# # ./cache/huggingface/hub
# # GO to this directory, choose the model you want to download, go to snapshots, and you will find a folder
# # This folder contains the config.json file
# # Get the path to this folder and paste it in model_name_or_path

# model_name_or_path= "/home/abhinav/.cache/huggingface/hub/models--thenlper--gte-small/snapshots/17e1f347d17fe144873b1201da91788898c639cd"

# model = AutoModelForCausalLM.from_pretrained(model_name_or_path)

In [21]:
# # To use the downloaded model using llama-cpp

# from langchain import LlamaCpp

# # Make sure the model path is correct for your system!
# llm = LlamaCpp(
#     model_path="Phi-3-mini-4k-instruct-fp16.gguf",
#     n_gpu_layers=-1,
#     max_tokens=500,
#     n_ctx=2048,
#     seed=42,
#     verbose=False
# )

In [22]:
# To use a model downloaded from ollama

# from langchain_community.llms import Ollama

# llm = Ollama(model="gemma:2b")
# llm.invoke("tell me about partial functions in python")

In [23]:
# # To use a model using ollama library

# import ollama

# response = ollama.generate(model='gemma:2b',
# prompt='what is a qubit?')
# print(response['response'])

In [24]:
# This is to download a model from huggingface embeddings
# This code uses the model if it's downloaded in the cache directory, else, it downloads from the web

from langchain.embeddings.huggingface import HuggingFaceEmbeddings

# Embedding Model for converting text to numerical representations
embedding_model = HuggingFaceEmbeddings(
    model_name='thenlper/gte-small'
)

  embedding_model = HuggingFaceEmbeddings(
2025-01-23 08:22:47.533830: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-23 08:22:47.695335: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-23 08:22:47.695382: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-23 08:22:47.713684: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-23 08:22:47.752041: 

In [25]:
# To use a model downloaded from ollama

from langchain_community.llms import Ollama

llm = Ollama(model="gemma:2b")
llm.invoke("tell me about partial functions in python")

"Sure, here's a detailed explanation of partial functions in Python:\n\n**Partial Functions**\n\nPartial functions are functions that take a subset of the arguments of a full function and return a single value. They are commonly used when you have a function that operates on a set of data and you want to apply it to only a subset of that data.\n\n**Syntax**\n\nA partial function is defined using the same syntax as a full function, except that you specify the arguments that you want to pass to the function. For example, the following code defines a partial function called `partial_sum` that takes two arguments, `a` and `b`, and returns the sum of those two arguments:\n\n```python\npartial_sum = lambda a, b: a + b\n```\n\n**Examples**\n\nHere are a few examples of partial functions in Python:\n\n* `partial_sum(2, 3)` returns 5\n* `partial_sum(4, 5, 6)` returns 15\n* `partial_sum(a, b)` returns the same value as `partial_sum(a, b)`\n\n**Uses**\n\nPartial functions can be used for a variet

In [27]:
# Creating a local vector store database

db = FAISS.from_texts(texts, embedding_model)

In [28]:
template = """<|user|>
Relevant information:
{context}

Provide a concise answer the following question using the relevant information provided above:
{question}<|end|>
<|assistant|>"""
prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)

# RAG Pipeline
## RetrievalQA:
# A pre-built class in langchain designed to handle Retrieval-Augmented Generation (RAG).
# It combines a retrieval mechanism (fetching relevant data) with a language model for answering questions.


# How to retrieve custom number of documents or put similarity score thresholds?
# retriever = db.as_retriever(search_kwargs={"k": 5})  # Retrieve the top 5 documents
# retriever = dv.as_retriever( search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5})


rag = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=db.as_retriever(),
    chain_type_kwargs={
        "prompt": prompt
    },
    verbose=True
)

In [29]:
rag.invoke('Income generated')



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


{'query': 'Income generated',
 'result': 'The film had a worldwide gross over $677 million (and $773 million with subsequent re-releases).'}