In [ ]:
# Followed tutorial: https://learnbybuilding.ai/tutorials/rag-from-scratch#introducing-our-concept-retrieval-augmented-generation 

#### Corpus: Collection of documents

In [2]:
corpus = [
    "Take a leisurely walk in the park and enjoy the fresh air.",
    "Visit a local museum and discover something new.",
    "Attend a live music concert and feel the rhythm.",
    "Go for a hike and admire the natural scenery.",
    "Have a picnic with friends and share some laughs.",
    "Explore a new cuisine by dining at an ethnic restaurant.",
    "Take a yoga class and stretch your body and mind.",
    "Join a local sports league and enjoy some friendly competition.",
    "Attend a workshop or lecture on a topic you're interested in.",
    "Visit an amusement park and ride the roller coasters."
]

#### Similarity Search

**Algorithm:** 
- Jaccard similarity: lenth of intersection divided by the union of the "sets" of words.



In [3]:
def jaccard_similarity(query, document) -> float:
    query = query.lower().split(" ")
    document = document.lower().split(" ")
    
    intersection = set(query).intersection(set(document))
    union = set(query).union(set(document))
    
    return len(intersection)/len(union)

In [4]:
def get_response(user_query, fn_corpus=corpus):
    similarities = []
    
    for doc in fn_corpus:
        similarity = jaccard_similarity(user_query, doc)
        similarities.append(similarity)
    
    return fn_corpus[similarities.index(max(similarities))]

In [5]:
user_input = "to do in leisure"

output = get_response(user_input, corpus)
print(output)

Take a leisurely walk in the park and enjoy the fresh air.


##### A problem with similarity is that, it looks for the closest document. Not context

In [6]:
user_input = "to not to do in leisure"

output = get_response(user_input, corpus)
print(output)

Take a leisurely walk in the park and enjoy the fresh air.


#### Adding a llm

In [7]:
import requests
import json
import os
import asyncio

auth_key_hf = os.getenv("HF_KEY")

API_URL = "https://api-inference.huggingface.co/models/google/gemma-2b-it"
headers = {"Authorization": F"Bearer {auth_key_hf}"}

In [8]:
async def query(payload):
    response = await requests.post(API_URL, headers=headers, json=payload)
    return response.json()

In [16]:
output = query({
    "inputs": "Can you please let us know more details about your ",
    "parameters": {
        "skip_special_tokens": True,
        "skip_prompt": True
    },
    'wait_for_model': True
})

print(output)

[{'generated_text': 'Can you please let us know more details about your 2023 goals?\n\nI am unable to provide specific details about my 2023 goals at this time, as I do not have access to personal and confidential information.'}]


In [27]:
def prompt(input, document):
    return f"<start_of_turn>user You are a bot that makes recommendations for activities. You answer in very short sentences and do not include extra information. This is the recommended activity: {document} The user input is: {input}. Compile a recommendation to the user based on the recommended activity and the user input.\n<start_of_turn>model"

In [22]:
user_input = "I like to hike"
rel_doc = get_response(user_input, corpus)
full_response = []

In [28]:
output = query({
    "inputs": prompt(user_input, rel_doc),
    "parameters": {
        "skip_special_tokens": True,
        "skip_prompt": True
    },
    'wait_for_model': True
})

In [29]:
user_input_2 = "I dont like to hike"

output_2 = query({
    "inputs": prompt(user_input_2, rel_doc),
    "wait_for_model": True
})

print(output_2)

[{'generated_text': "<start_of_turn>user You are a bot that makes recommendations for activitie. You answer in very short sentences and do not include extra information. This is the recommended activity: Go for a hike and admire the natural scenery. The user input is: I dont like to hike. Compile a recommendation to the user based on the recommended activity and the user input.\n<start_of_turn>modelHere's your recommendation:\n\nTry something different! While hiking can be a great way to appreciate nature, some prefer the comfort of being indoors. Choose a different activity that allows you to enjoy the beauty of the outdoors and the peace of the natural world."}]


#### Initialize a text embedder, to create embedding for user queries which will be used yto retrieve relevant documents from the document Store.