In [None]:
! pip install ollama

In [22]:
import ollama
import numpy as np
import os 
import time
import json
from numpy.linalg import norm

In [None]:
### Intention of this exercise is to use only python to do RAG :
### No use of Vector DBs, Lnagchain, LlamaIndex.

In [8]:
# Step 1 : Parse the text file into chunks (currently only txt is supported)
# Also, the text split is based on new line, the context remains short and less informative, try overlapping + larger context

def parse_file(filename):
    with open(filename, encoding="utf-8-sig") as f:
        paragraphs = []
        buffer = []
        
        for line in f:
            line = line.strip()
            if line:
                buffer.append(line)
            else:
                if buffer:
                    paragraphs.append(" ".join(buffer))
                    buffer = []
        
        if buffer:
            paragraphs.append(" ".join(buffer))
    
    return paragraphs

In [27]:
# Step 2 : Create and save embeddings
# download model using ollama pull mxbai-embed-large
# We need not create embeddings everytime for the same file - save_embeddings 

def save_embeddings(filename, embeddings):
    if not os.path.exists("embeddings"):
        os.makedirs("embeddings")
    with open(f"embeddings/{filename}.json", "w") as f:
        json.dump(embeddings, f)


def load_embeddings(filename):
    # check if file exists
    if not os.path.exists(f"embeddings/{filename}.json"):
        return False
    # load embeddings from json
    with open(f"embeddings/{filename}.json", "r") as f:
        return json.load(f)


def create_embeddings(filename, text_chunks, modelname):

    if (embeddings := load_embeddings(filename)) is not False:
        return embeddings
    embeddings = [
            ollama.embeddings(model=modelname, prompt=chunk)["embedding"]
            for chunk in text_chunks
        ]
    save_embeddings(filename, embeddings)
    return embeddings

In [28]:
# Step 3 : Retrieval: find cosine similarity of every embedding to a given query embedding and get top 5 docs.

def find_most_similar(query, context):
    query_norm = norm(query)
    similarity_scores = [
        np.dot(query, item) / (query_norm * norm(item)) for item in context
    ]
    return sorted(zip(similarity_scores, range(len(context))), reverse=True)

In [29]:
SYSTEM_PROMPT = """

You are a highly knowledgeable and concise reading assistant. Your task is to answer questions based strictly on the context provided. 

Guidelines:
1. **Answer within the context:** Provide responses solely based on the information within the snippets. Do not infer or assume details beyond the provided context.
2. **Be concise:** Offer clear and succinct answers. Aim for brevity while ensuring completeness.
3. **Admit when uncertain:** If the context does not contain the answer or if you're unsure, clearly state, "I don't know based on the provided context."

Context:


"""


In [43]:
def process():
    filename = 'advs.txt'
    text_chunks = parse_file(filename=filename)
    embeddings = create_embeddings(filename, text_chunks, modelname='mxbai-embed-large')
    query = 'Who is Dr. Watson ?'

    query_embedding = ollama.embeddings(model="mxbai-embed-large", prompt=query)["embedding"]
    most_similar_chunks = find_most_similar(query_embedding, embeddings)[:5]

    messages=[
            {
                "role": "system",
                "content": SYSTEM_PROMPT
                + "\n".join(text_chunks[item[1]] for item in most_similar_chunks),
            },
            {"role": "user", "content": query},
        ]

    response = ollama.chat(
        model="llama3",
        messages=messages,
    )

    print(messages)

    print("\n\n")
    print(response["message"]["content"])

In [44]:
process()

[{'role': 'system', 'content': '\n\nYou are a highly knowledgeable and concise reading assistant. Your task is to answer questions based strictly on the context provided. \n\nGuidelines:\n1. **Answer within the context:** Provide responses solely based on the information within the snippets. Do not infer or assume details beyond the provided context.\n2. **Be concise:** Offer clear and succinct answers. Aim for brevity while ensuring completeness.\n3. **Admit when uncertain:** If the context does not contain the answer or if you\'re unsure, clearly state, "I don\'t know based on the provided context."\n\nContext:\n\n\n"What do you make of that, Watson?"\n"This is my friend, Dr. Watson. He has been of most vital use to me in several of my cases, and a lucky chance has made it possible for me to bring him out and associate him with this investigation."\n"Awake, Watson?" he asked.\n"Pray take a seat," said Holmes. "This is my friend and colleague, Dr. Watson, who is occasionally good enou