RAG Pipeline from Scratch Using OLlama Python & Llama2

In [16]:
corpus_of_documents = [
    "Take a leisurely walk in the park and enjoy the fresh air.",
    "Visit a local museum and discover something new.",
    "Attend a live music concert and feel the rhythm.",
    "Go for a hike and admire the natural scenery.",
    "Have a picnic with friends and share some laughs.",
    "Explore a new cuisine by dining at an ethnic restaurant.",
    "Take a yoga class and stretch your body and mind.",
    "Join a local sports league and enjoy some friendly competition.",
    "Attend a workshop or lecture on a topic you're interested in.",
    "Visit an amusement park and ride the roller coasters."
]

In [13]:
user_query="i am iraninan and i live in USA"
document="iran is the country for the iraninan and erveryone"
from collections import Counter
import math
query_tokens=user_query.lower().split(" ")
document_tokens=document.lower().split(" ")
print(query_tokens)
print(document_tokens)


['i', 'am', 'iraninan', 'and', 'i', 'live', 'in', 'usa']
['iran', 'is', 'the', 'country', 'for', 'the', 'iraninan', 'and', 'erveryone']


In [14]:
query_counter=Counter(query_tokens)
document_counter=Counter(document_tokens)
print(query_counter)
print(document_counter)

Counter({'i': 2, 'am': 1, 'iraninan': 1, 'and': 1, 'live': 1, 'in': 1, 'usa': 1})
Counter({'the': 2, 'iran': 1, 'is': 1, 'country': 1, 'for': 1, 'iraninan': 1, 'and': 1, 'erveryone': 1})


In [None]:
for token in query_counter.keys():
    print(token, query_counter[token])

i 2
am 1
iraninan 1
and 1
live 1
in 1
usa 1


In [27]:
# sentence vector based on frequency
lst=[]
for token in query_counter.keys():
    lst.append (query_counter[token])

lst

[2, 1, 1, 1, 1, 1, 1]

In [29]:
for tokens in query_counter.keys() & document_counter.keys():
    print(tokens)

iraninan
and


In [None]:
# dot product in vector for similarity formula
mylist=[]
for tokens in query_counter.keys() & document_counter.keys():
    mylist.append(query_counter[tokens]*document_counter[tokens])
mylist

[1, 1]

In [33]:
sum(mylist)

2

In [37]:
document_magnitude = math.sqrt(sum(document_counter[token] ** 2 for token in document_counter))
document_magnitude

3.3166247903554

In [40]:
query_magnitude = math.sqrt(sum(query_counter[token] ** 2 for token in query_counter))
query_magnitude

3.1622776601683795

In [41]:
dot_prod=sum(mylist)

In [43]:
similarity=(dot_prod)/(query_magnitude*document_magnitude)
similarity

0.19069251784911848

In [None]:
user_query="is yoga good for health"
document="yoga is very good for living healthy lifesytle."

In [44]:
def cosine_similarity(query, document):
    # Tokenize and convert to lowercase
    query_tokens = query.lower().split(" ")
    document_tokens = document.lower().split(" ")

    # Create Counters for query and document
    query_counter = Counter(query_tokens)
    document_counter = Counter(document_tokens)

    # Calculate dot product
    dot_product = sum(query_counter[token] * document_counter[token] for token in query_counter.keys() & document_counter.keys())

    # Calculate magnitudes
    query_magnitude = math.sqrt(sum(query_counter[token] ** 2 for token in query_counter))
    document_magnitude = math.sqrt(sum(document_counter[token] ** 2 for token in document_counter))

    # Calculate cosine similarity
    similarity = dot_product / (query_magnitude * document_magnitude) if query_magnitude * document_magnitude != 0 else 0

    return similarity

In [45]:

cosine_similarity(user_query,document)

0.19069251784911848

In [46]:
def return_response(query, corpus):
    similarities = []
    for doc in corpus:
        similarity = cosine_similarity(query, doc)
        similarities.append(similarity)
    return corpus_of_documents[similarities.index(max(similarities))]

In [50]:
user_input="i like fresh air."
relevant_document=return_response(user_input,corpus_of_documents)
relevant_document

'Take a leisurely walk in the park and enjoy the fresh air.'

In [56]:
import requests
import json

# Define prompt template
prompt = """
You are a bot that makes recommendations for activities. You answer in very short sentences and do not include extra information.
This is the recommended activity: {relevant_document}
The user input is: {user_input}
Compile a recommendation to the user based on the recommended activity and the user input.
"""

# LLaMA2 (Ollama) endpoint
url = 'http://localhost:11434/api/generate'

# Format prompt with actual values
data = {
    "model": "llama2",
    "prompt": prompt.format(user_input=user_input, relevant_document=relevant_document)
}

headers = {'Content-Type': 'application/json'}

# Collect full LLM response
full_response = []

try:
    response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)
    for line in response.iter_lines():
        if line:
            decoded_line = json.loads(line.decode('utf-8'))
            # Append response only if key exists
            if 'response' in decoded_line:
                full_response.append(decoded_line['response'])
            elif 'error' in decoded_line:
                print("❌ LLaMA2 Error:", decoded_line['error'])
                break
            else:
                print("⚠️ Unexpected response format:", decoded_line)
finally:
    response.close()

# Print the full response
print("🤖", ''.join(full_response))


🤖  Great! Based on your input, I recommend taking a yoga class to help you relax and stretch your body and mind. It's a great way to reduce stress and improve your overall well-being.
