In [1]:
import ollama
import time
import os
import json
import numpy as np
from numpy.linalg import norm
from openai import OpenAI
import time

In [2]:
def parse_file(filename):
    with open(filename, encoding="utf-8-sig") as f:
        paragraphs = []
        buffer = []
        for line in f.readlines():
            line = line.strip()
            if line:
                buffer.append(line)
            elif len(buffer):
                paragraphs.append((" ").join(buffer))
                buffer = []
        if len(buffer):
            paragraphs.append((" ").join(buffer))
        return paragraphs


def save_embeddings(filename, embeddings):
    # create dir if it doesn't exist
    if not os.path.exists("embeddings"):
        os.makedirs("embeddings")
    # dump embeddings to json
    with open(f"embeddings/{filename}.json", "w") as f:
        json.dump(embeddings, f)




def load_embeddings(filename):
    # check if file exists
    if not os.path.exists(f"embeddings/{filename}.json"):
        return False
    # load embeddings from json
    with open(f"embeddings/{filename}.json", "r") as f:
        return json.load(f)



def get_embeddings(filename, modelname, chunks):
    # check if embeddings are already saved
    if (embeddings := load_embeddings(filename)) is not False:
        return embeddings
    # get embeddings from ollama
    embeddings = [
        ollama.embeddings(model=modelname, prompt=chunk)["embedding"]
        for chunk in chunks
    ]
    # save embeddings
    save_embeddings(filename, embeddings)
    return embeddings

def find_most_similar(needle, haystack):
    needle_norm = norm(needle)

    similarity_scores = [
        np.dot(needle, item) / (needle_norm * norm(item)) for item in haystack
    ]

    # print(needle, haystack[0])
    # print(np.dot(needle, haystack[0]))
    return sorted(zip(similarity_scores, range(len(haystack))), reverse=True)


def rag_function(prompt):

    SYSTEM_PROMPT = """You are a helpful reading assistant who answers questions
        based on snippets of text provided in context. Answer only using the context provided,
        being as concise as possible. If you're unsure, just say that you don't know.
        Do not give answers that are outside the context given.
        Answer in about 150 words for each prompt.
        Context:
    """
    start_time = time.time()
    
    filename = "data.txt"
    paragraphs = parse_file(filename)

    embeddings = get_embeddings(filename, "nomic-embed-text", paragraphs)



    
    prompt_embedding = ollama.embeddings(model="nomic-embed-text", prompt=prompt)["embedding"]
    most_similar_chunks = find_most_similar(prompt_embedding, embeddings)[:5]



    response = ollama.chat(
    model="mistral:latest" ,
    messages=[
        {
            "role": "system",
            "content": SYSTEM_PROMPT
            + "\n".join(paragraphs[item[1]] for item in most_similar_chunks),
        },
        {"role": "user", "content": prompt},
    ],
    )


    
    end_time = time.time()
    print("\n")
    print(response['message']['content'])
    print("\n")
    print(f"Execution Time {end_time-start_time} ")


In [3]:
#Strainght forward Question, 
prompt="Enlist the achievements of Sachin Tendulkar"
rag_function(prompt)



1. Regarded as one of the greatest batsmen in cricket history.
2. Holds the record for the most runs in both ODI (over 18,000) and Test (over 15,000) cricket.
3. Received numerous awards, including the Arjuna Award (1994), Khel Ratna Award (1997), Padma Shri (1998), and Padma Vibhushan (2008).
4. Elected as a Member of Parliament, Rajya Sabha from 2012 to 2018.
5. First sportsperson to receive the Bharat Ratna, India's highest civilian award.
6. Included in Time magazine's annual list of the most influential people in the world (2010).
7. Awarded the Sir Garfield Sobers Trophy for cricketer of the year at the 2010 International Cricket Council (ICC) Awards.
8. Played his first Test match at age 16 and represented India internationally for over 24 years.
9. Named "Player of the Tournament" at the 2003 World Cup.
10. Part of the team that won the 2011 Cricket World Cup, his first win in six World Cup appearances for India.
11. Included in an all-time Test World XI to mark the 150th ann

In [4]:
#Reasoning RAG

prompt="Who is better batsman Sachin or Sourav?"
rag_function(prompt)



 The text does not directly compare Sachin Tendulkar and Sourav Ganguly as batsmen, but it mentions that Tendulkar is hailed as the world's most prolific batsman and holds the all-time highest run-scorer in both ODI and Test cricket. No such accolades are mentioned for Ganguly regarding his batting skills. However, Ganguly was a successful captain and led India to several victories. Therefore, it is not possible to definitively say who is a better batsman based on the provided context.


Execution Time 70.53378677368164 


In [5]:
#Reasoning RAG

prompt="Who is better Captain Dhoni or Sourav?"
rag_function(prompt)



 The text provided does not make a direct comparison between the captaining skills of Mahendra Singh Dhoni and Sourav Ganguly, as it focuses on their achievements instead. However, both are considered successful Indian cricket captains, with Dhoni leading India to victory in three different limited overs ICC tournaments (2007 ICC World Twenty20, 2011 Cricket World Cup, and 2013 ICC Champions Trophy) and Ganguly winning the 2002 ICC Champions Trophy and reaching the final of the 2003 Cricket World Cup.


Execution Time 70.4772744178772 


In [6]:
#Analytical RAG

prompt="Difference between total runs scored by Sachin and Dhoni"
rag_function(prompt)



 The total runs scored by Sachin Tendulkar are more than those scored by M.S. Dhoni. While Sachin has scored over 18,000 runs in Test cricket and more than 18,000 runs in One Day Internationals (ODIs), putting his combined total at approximately 36,000 runs, Dhoni's international career total stands at 17,266 runs.


Execution Time 66.23426604270935 


In [7]:
#Analytical RAG

prompt="Who has most accolades Dhoni or sachin?"
rag_function(prompt)



 Based on the provided context, it is not possible to definitively determine which of MSDhoni and Sachin Tendulkar has more accolades as the information only covers some of the awards received by MSDhoni. However, it is known that Tendulkar holds several records in cricket, such as being the all-time highest run-scorer in both ODI and Test cricket and receiving the most player of the match awards in international cricket. MSDhoni, on the other hand, has been awarded India's highest sport honor, the Major Dhyan Chand Khel Ratna Award, as well as several civilian honors from the Indian government and is one of the most successful captains in international cricket, leading India to three different limited overs ICC tournaments. To accurately compare their accolades, a comprehensive comparison considering all awards and records held by both cricketers would be needed.


Execution Time 78.81063556671143 


In [8]:
#Analytical RAG

prompt="Amongst three, who is the best player in World cups?"
rag_function(prompt)



 The provided context does not explicitly state a winner for the best player in World Cups. However, it mentions that Sachin Tendulkar was part of the Indian team that won the 2011 Cricket World Cup and was named "Player of the Tournament" at the 2003 World Cup. Mahendra Singh Dhoni led India to victory in the 2011 Cricket World Cup, making him a winner of the tournament as a captain, but not as a player. Sourav Ganguly was part of the Indian team that reached the final of the 2003 Cricket World Cup, but did not win it. Therefore, based on the information given, it appears that both Tendulkar and Dhoni have had significant contributions to India's success in World Cups, but no definitive answer can be provided about who is the best player in World cups from this context.


Execution Time 86.46004176139832 


In [9]:
#Analytical RAG

prompt="Amongst three, who has most awards?"
rag_function(prompt)



 Among the three cricketers provided in the context (Sachin Tendulkar, Mahendra Singh Dhoni, and Sourav Ganguly), Sachin Tendulkar has received the most awards. He was honored with the Arjuna Award, Khel Ratna Award, Padma Shri, Padma Vibhushan, and Bharat Ratna (India's highest civilian award) from the government of India. Additionally, he was included in Time magazine's list of the most influential people in the world in 2010 and won the Sir Garfield Sobers Trophy for cricketer of the year at the 2010 International Cricket Council (ICC) Awards.


Execution Time 66.34929537773132 


In [10]:
#Summary RAG

prompt="Give me summary of Dhoni's career, in less than 200 words?"
rag_function(prompt)



 Mahendra Singh Dhoni is an esteemed Indian cricketer who played as a right-handed batsman and wicket-keeper. He represented the Indian cricket team from 2004 to 2019 and captained the side in limited overs formats from 2007 to 2017, making him the most successful Indian captain. Dhoni led India to victory in significant tournaments including the 2007 ICC World Twenty20, the 2011 Cricket World Cup, and the 2013 ICC Champions Trophy. He is also known for his contributions in the Indian Premier League (IPL), where he played for Chennai Super Kings, leading them to five IPL finals and five victories. Dhoni has scored over 17,000 runs in international cricket, including more than 10,000 runs in ODIs with an average of over 50. He was awarded India's highest sports honor, the Major Dhyan Chand Khel Ratna Award, in 2008 and has also received the Padma Shri (2009) and Padma Bhushan (2018). Sachin Tendulkar is another iconic Indian cricketer who is considered one of the greatest batsmen in h

In [11]:
#Summary RAG

prompt="In about 150 words, give an overview of Sourav's career"
rag_function(prompt)



 Sourav Ganguly is a former Indian cricketer and current commentator, popularly known as the "Maharaja of Indian Cricket." He served as captain of the Indian national cricket team, leading them to victories in the 2002 ICC Champions Trophy and the finals of the 2003 Cricket World Cup, the 2000 ICC Champions Trophy, and the 2004 Asia Cup. Despite not being among the highest run-scorers, his leadership is considered one of India's most successful. His career spanned over two decades, representing Mumbai domestically and India internationally.


Execution Time 191.7901120185852 


In [None]:
#Analytical RAG

prompt="Enlist the total runs scored by all three players"
rag_function(prompt)