In [10]:
from collections import Counter
import math
import requests
import json

In [6]:
corpus = [
    "The Eiffel Tower is located in Paris, France.",
    "Python is a popular programming language for data science.",
    "The Great Wall of China is one of the Seven Wonders of the World.",
    "Albert Einstein developed the theory of relativity.",
    "The Amazon rainforest produces 20% of the Earth's oxygen.",
    "JavaScript is used to make websites interactive.",
    "The Pacific Ocean is the largest ocean on Earth.",
    "Marie Curie was the first woman to win a Nobel Prize.",
    "The Sun is about 93 million miles away from Earth.",
    "The Pyramids of Giza were built as tombs for the pharaohs.",
    "Water boils at 100°C at sea level.",
    "Tokyo is the capital city of Japan.",
    "Photosynthesis converts sunlight into chemical energy in plants.",
    "Google was founded by Larry Page and Sergey Brin.",
    "The human brain contains around 86 billion neurons.",
    "The Internet connects billions of devices worldwide.",
    "Saturn is the planet with the most visible rings.",
    "The speed of light is approximately 299,792 kilometers per second.",
    "Artificial Intelligence is transforming industries globally.",
    "Mount Everest is the tallest mountain on Earth."
]

In [8]:
queries = [
    "Where is the Eiffel Tower located?",
    "Who created the theory of relativity?",
    "Which planet has the most visible rings?",
    "What do plants use to make food from sunlight?"
]

In [9]:
def cosine_similarity(query, document):
    # Tokenize and convert to lowercase
    query_tokens = query.lower().split(" ")
    document_tokens = document.lower().split(" ")

    # Create Counters for query and document
    query_counter = Counter(query_tokens)
    document_counter = Counter(document_tokens)

    # Calculate dot product
    dot_product = sum(query_counter[token] * document_counter[token] for token in query_counter.keys() & document_counter.keys())

    # Calculate magnitudes
    query_magnitude = math.sqrt(sum(query_counter[token] ** 2 for token in query_counter))
    document_magnitude = math.sqrt(sum(document_counter[token] ** 2 for token in document_counter))

    # Calculate cosine similarity
    similarity = dot_product / (query_magnitude * document_magnitude) if query_magnitude * document_magnitude != 0 else 0

    return similarity

In [13]:
def return_response(query, corpus):
    similarities = []
    for doc in corpus:
        similarity = cosine_similarity(query, doc)
        similarities.append(similarity)
    return corpus[similarities.index(max(similarities))]

In [15]:
relevant_document=return_response(queries[0],corpus)

In [25]:
relevant_document

['The Eiffel Tower is located in Paris, France.',
 'Python is a popular programming language for data science.',
 'The Great Wall of China is one of the Seven Wonders of the World.',
 'Albert Einstein developed the theory of relativity.',
 "The Amazon rainforest produces 20% of the Earth's oxygen.",
 'JavaScript is used to make websites interactive.',
 'The Pacific Ocean is the largest ocean on Earth.',
 'Marie Curie was the first woman to win a Nobel Prize.',
 'The Sun is about 93 million miles away from Earth.',
 'The Pyramids of Giza were built as tombs for the pharaohs.',
 'Water boils at 100°C at sea level.',
 'Tokyo is the capital city of Japan.',
 'Photosynthesis converts sunlight into chemical energy in plants.',
 'Google was founded by Larry Page and Sergey Brin.',
 'The human brain contains around 86 billion neurons.',
 'The Internet connects billions of devices worldwide.',
 'Saturn is the planet with the most visible rings.',
 'The speed of light is approximately 299,792 ki

In [37]:
user_input = queries[2]
relevant_document = corpus
full_response = []

In [40]:
full_response = []
prompt = """
You are a bot that makes recommendations for activities. You answer in very short sentences and do not include extra information.
This is the recommended activity: {relevant_document}
The user input is: {user_input}
Compile a recommendation to the user based on the recommended activity and the user input.
write your ouptupt seperate and the output you got from the relevant doucment seperate and give another output with mixed thoughts
"""

url = 'http://localhost:11434/api/generate'


data = {
    "model": "gemma3",
    "prompt": prompt.format(user_input=user_input, relevant_document=relevant_document)
}

headers = {'Content-Type': 'application/json'}

response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)


try:
    for line in response.iter_lines():
        # filter out keep-alive new lines
        if line:
            decoded_line = json.loads(line.decode('utf-8'))
            # print(decoded_line['response'])  # uncomment to results, token by token
            full_response.append(decoded_line['response'])
finally:
    response.close()
    
    
print(''.join(full_response))

Saturn has the most visible rings. 

**Document Output:**
Saturn is the planet with the most visible rings.

**Mixed Thoughts Output:**
Saturn's rings are amazing.
