In [1]:
corpus_of_documents = [
    "Take a leisurely walk in the park and enjoy the fresh air.",
    "Visit a local museum and discover something new.",
    "Attend a live music concert and feel the rhythm.",
    "Go for a hike and admire the natural scenery.",
    "Have a picnic with friends and share some laughs.",
    "Explore a new cuisine by dining at an ethnic restaurant.",
    "Take a yoga class and stretch your body and mind.",
    "Join a local sports league and enjoy some friendly competition.",
    "Attend a workshop or lecture on a topic you're interested in.",
    "Visit an amusement park and ride the roller coasters."
]

In [2]:
corpus_of_documents

['Take a leisurely walk in the park and enjoy the fresh air.',
 'Visit a local museum and discover something new.',
 'Attend a live music concert and feel the rhythm.',
 'Go for a hike and admire the natural scenery.',
 'Have a picnic with friends and share some laughs.',
 'Explore a new cuisine by dining at an ethnic restaurant.',
 'Take a yoga class and stretch your body and mind.',
 'Join a local sports league and enjoy some friendly competition.',
 "Attend a workshop or lecture on a topic you're interested in.",
 'Visit an amusement park and ride the roller coasters.']

In [3]:
# ! uv pip install latexify-py

In [4]:
import latexify

@latexify.function
def cosine_similarity(A, B, n):
    return (
        sum(A[i] * B[i] for i in range(n)) /
        ((sum(A[i]**2 for i in range(n)))**0.5 * (sum(B[i]**2 for i in range(n)))**0.5)
    )


cosine_similarity

<latexify.ipython_wrappers.LatexifiedFunction at 0x24960a3fca0>

In [8]:
user_query="i am an indian and i live in india"

In [10]:
document="india is a country for the indians and for eveyone"

In [11]:
from collections import Counter 
import math 

In [15]:
query_tokens = user_query.lower().split()

In [16]:
query_tokens

['i', 'am', 'an', 'indian', 'and', 'i', 'live', 'in', 'india']

In [17]:
document_token=document.lower().split()

In [28]:
document_token

['india',
 'is',
 'a',
 'country',
 'for',
 'the',
 'indians',
 'and',
 'for',
 'eveyone']

In [18]:
query_counter = Counter(query_tokens)
query_counter

Counter({'i': 2,
         'am': 1,
         'an': 1,
         'indian': 1,
         'and': 1,
         'live': 1,
         'in': 1,
         'india': 1})

In [19]:
document_counter=Counter(document_token)
document_counter

Counter({'for': 2,
         'india': 1,
         'is': 1,
         'a': 1,
         'country': 1,
         'the': 1,
         'indians': 1,
         'and': 1,
         'eveyone': 1})

In [24]:
query_counter.keys()

dict_keys(['i', 'am', 'an', 'indian', 'and', 'live', 'in', 'india'])

In [21]:
lst=[]
for token in query_counter.keys():
    lst.append(query_counter[token])

In [22]:
lst

[2, 1, 1, 1, 1, 1, 1, 1]

In [49]:
import math
import json
import requests

# Sample corpus of documents
corpus_of_documents = [
    "Take a leisurely walk in the park and enjoy the fresh air.",
    "Visit a local museum and discover something new.",
    "Attend a live music concert and feel the rhythm.",
    "Go for a hike and admire the natural scenery.",
    "Have a picnic with friends and share some laughs.",
    "Explore a new cuisine by dining at an ethnic restaurant.",
    "Take a yoga class and stretch your body and mind.",
    "Join a local sports league and enjoy some friendly competition.",
    "Attend a workshop or lecture on a topic you're interested in.",
    "Visit an amusement park and ride the roller coasters."
]

def tokenize(text):
    """Convert text to lowercase and split into tokens."""
    return text.lower().split()

def word_frequency(tokens):
    """Count frequency of each token."""
    freq = {}
    for token in tokens:
        freq[token] = freq.get(token, 0) + 1
    return freq

def cosine_similarity(query, document):
    """Calculate cosine similarity between query and document."""
    query_tokens = tokenize(query)
    doc_tokens = tokenize(document)
    
    query_freq = word_frequency(query_tokens)
    doc_freq = word_frequency(doc_tokens)
    
    # Common tokens
    common_tokens = set(query_freq.keys()) & set(doc_freq.keys())
    
    # Dot product
    dot_product = sum(query_freq[token] * doc_freq[token] for token in common_tokens)
    
    # Magnitudes
    query_magnitude = math.sqrt(sum(freq ** 2 for freq in query_freq.values()))
    doc_magnitude = math.sqrt(sum(freq ** 2 for freq in doc_freq.values()))
    
    # Avoid division by zero
    if query_magnitude * doc_magnitude == 0:
        return 0.0
    
    return dot_product / (query_magnitude * doc_magnitude)

def retrieve_relevant_document(query, corpus):
    """Retrieve the most relevant document from the corpus."""
    similarities = [cosine_similarity(query, doc) for doc in corpus]
    max_similarity_idx = similarities.index(max(similarities))
    return corpus[max_similarity_idx]

def generate_response(query, relevant_document):
    """Generate a response using a local LLaMA model."""
    prompt = (
        "You are a bot that makes recommendations for activities. "
        "Answer in short sentences. Do not include extra information. "
        f"Recommended activity: {relevant_document}\n"
        f"User input: {query}\n"
        "Compile a recommendation based on the activity and user input."
    )
    
    url = "http://localhost:11434/api/generate"
    data = {
        "model": "llama3.2:1b",
        "prompt": prompt
    }
    headers = {"Content-Type": "application/json"}
    
    full_response = []
    response = None
    try:
        response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)
        for line in response.iter_lines():
            if line:
                decoded_line = json.loads(line.decode("utf-8"))
                full_response.append(decoded_line["response"])
    except Exception as e:
        print(f"Error during API call: {e}")
        return "Sorry, I couldn't generate a response at this time."
    finally:
        if response:
            response.close()
    
    return "".join(full_response)

def rag_pipeline(user_query, corpus):
    """Main RAG pipeline: retrieve and generate."""
    relevant_doc = retrieve_relevant_document(user_query, corpus)
    response = generate_response(user_query, relevant_doc)
    return response

# Example usage
if __name__ == "__main__":
    user_query = "i like to do yoga"
    response = rag_pipeline(user_query, corpus_of_documents)
    print(response)

Try taking a morning yoga class with a gentle instructor to help you wake up feeling refreshed.

Alternatively, consider attending a private yoga session at home with a certified teacher for personalized guidance and relaxation.

You could also join a local yoga group or community center for social interaction and a sense of belonging.

Lastly, look into online yoga classes or videos that cater to beginners, allowing you to practice from the comfort of your own space.


In [None]:
import math
import json
import requests

# Sample corpus of documents
corpus_of_documents = [
    "Take a leisurely walk in the park and enjoy the fresh air.",
    "Visit a local museum and discover something new.",
    "Attend a live music concert and feel the rhythm.",
    "Go for a hike and admire the natural scenery.",
    "Have a picnic with friends and share some laughs.",
    "Explore a new cuisine by dining at an ethnic restaurant.",
    "Take a yoga class and stretch your body and mind.",
    "Join a local sports league and enjoy some friendly competition.",
    "Attend a workshop or lecture on a topic you're interested in.",
    "Visit an amusement park and ride the roller coasters."
]

def tokenize(text):
    """Convert text to lowercase and split into tokens."""
    # Dry Run: For input "i like to do yoga"
    # text.lower() -> "i like to do yoga"
    # text.lower().split() -> ['i', 'like', 'to', 'do', 'yoga']
    return text.lower().split()

def word_frequency(tokens):
    """Count frequency of each token."""
    # Dry Run: For tokens = ['i', 'like', 'to', 'do', 'yoga']
    # Initialize empty dict: freq = {}
    # Loop: freq = {'i': 1, 'like': 1, 'to': 1, 'do': 1, 'yoga': 1}
    freq = {}
    for token in tokens:
        freq[token] = freq.get(token, 0) + 1
    return freq

def cosine_similarity(query, document):
    """Calculate cosine similarity between query and document."""
    # Dry Run: query = "i like to do yoga", document = "Take a yoga class and stretch your body and mind."
    query_tokens = tokenize(query)
    # query_tokens = ['i', 'like', 'to', 'do', 'yoga']
    doc_tokens = tokenize(document)
    # doc_tokens = ['take', 'a', 'yoga', 'class', 'and', 'stretch', 'your', 'body', 'and', 'mind']
    
    query_freq = word_frequency(query_tokens)
    # query_freq = {'i': 1, 'like': 1, 'to': 1, 'do': 1, 'yoga': 1}
    doc_freq = word_frequency(doc_tokens)
    # doc_freq = {'take': 1, 'a': 1, 'yoga': 1, 'class': 1, 'and': 2, 'stretch': 1, 'your': 1, 'body': 1, 'mind': 1}
    
    common_tokens = set(query_freq.keys()) & set(doc_freq.keys())
    # common_tokens = {'yoga'} (assuming 'to' may not match exactly)
    
    dot_product = sum(query_freq[token] * doc_freq[token] for token in common_tokens)
    # dot_product = 1 * 1 = 1 (for 'yoga')
    
    query_magnitude = math.sqrt(sum(freq ** 2 for freq in query_freq.values()))
    # query_magnitude = sqrt(1^2 + 1^2 + 1^2 + 1^2 + 1^2) = sqrt(5) ≈ 2.236
    doc_magnitude = math.sqrt(sum(freq ** 2 for freq in doc_freq.values()))
    # doc_magnitude = sqrt(1^2 + 1^2 + 1^2 + 1^2 + 2^2 + 1^2 + 1^2 + 1^2 + 1^2) = sqrt(11) ≈ 3.317
    
    if query_magnitude * doc_magnitude == 0:
        return 0.0
    return dot_product / (query_magnitude * doc_magnitude)
    # similarity = 1 / (2.236 * 3.317) ≈ 0.135

def retrieve_relevant_document(query, corpus):
    """Retrieve the most relevant document from the corpus."""
    # Dry Run: query = "i like to do yoga", corpus = [doc1, doc2, ..., doc7, ...]
    similarities = [cosine_similarity(query, doc) for doc in corpus]
    # similarities = [sim_doc1, sim_doc2, ..., 0.135 (doc7), ...]
    # doc7 ("Take a yoga class...") has highest similarity due to 'yoga'
    max_similarity_idx = similarities.index(max(similarities))
    # max_similarity_idx = 6
    return corpus[max_similarity_idx]
    # Returns: "Take a yoga class and stretch your body and mind."

def generate_response(query, relevant_document):
    """Generate a response using a local LLaMA model."""
    # Dry Run: query = "i like to do yoga", relevant_document = "Take a yoga class and stretch your body and mind."
    prompt = (
        "You are a bot that makes recommendations for activities. "
        "Answer in short sentences. Do not include extra information. "
        f"Recommended activity: {relevant_document}\n"
        f"User input: {query}\n"
        "Compile a recommendation based on the activity and user input."
    )
    # prompt = "You are a bot... Recommended activity: Take a yoga class... User input: i like to do yoga..."

    url = "http://localhost:11434/api/generate"
    data = {
        # "model": "llama3.2:1b",
        # "model": "deepseek-r1:1.5b",
        "model": "gemma3:4b",
        "prompt": prompt
    }
    headers = {"Content-Type": "application/json"}
    
    full_response = []
    response = None  # Initialize response to None to avoid UnboundLocalError
    try:
        response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)
        # Dry Run: Sends POST request to local LLaMA model
        # Assume response streams tokens: "Great!", "Try", "a", "yoga", ...
        if response.status_code != 200:
            raise Exception(f"API request failed with status {response.status_code}")
        
        for line in response.iter_lines():
            if line:
                decoded_line = json.loads(line.decode("utf-8"))
                full_response.append(decoded_line["response"])
                # full_response = ["Great!", "Try", "a", ...]
    except Exception as e:
        # Dry Run: If request fails (e.g., server down), return error message
        return f"Error generating response: {str(e)}"
    finally:
        # Only call close() if response was assigned
        if response is not None:
            response.close()
    
    return "".join(full_response)
    # Returns: "Great! Try a yoga class to stretch your body and mind."

def rag_pipeline(user_query, corpus):
    """Main RAG pipeline: retrieve and generate."""
    # Dry Run: user_query = "i like to do yoga"
    relevant_doc = retrieve_relevant_document(user_query, corpus)
    # relevant_doc = "Take a yoga class and stretch your body and mind."
    response = generate_response(user_query, relevant_doc)
    # response = "Great! Try a yoga class to stretch your body and mind."
    return response

# Example usage
if __name__ == "__main__":
    user_query = "i like to do yoga"
    response = rag_pipeline(user_query, corpus_of_documents)
    print(response)
    # Output: "Great! Try a yoga class to stretch your body and mind."
    # If API fails: "Error generating response: ..."

In [47]:
! ollama list

NAME                       ID              SIZE      MODIFIED    
deepseek-r1:1.5b           e0979632db5a    1.1 GB    9 days ago     
nomic-embed-text:latest    0a109f422b47    274 MB    2 weeks ago    
llama3.2:1b                baf6a787fdff    1.3 GB    2 weeks ago    
