In [1]:
dataset = []
with open('cat-facts.txt', 'r') as file:
  dataset = file.readlines()
  print(f'Loaded {len(dataset)} entries')


Loaded 150 entries


In [2]:
import ollama

EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-base-en-v1.5-gguf'
LANGUAGE_MODEL = 'hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF'

# Each element in the VECTOR_DB will be a tuple (chunk, embedding)
# The embedding is a list of floats, for example: [0.1, 0.04, -0.34, 0.21, ...]
VECTOR_DB = []

def add_chunk_to_database(chunk):
    
    embedding = ollama.embed(model=EMBEDDING_MODEL, input=chunk)['embeddings'][0]
    VECTOR_DB.append((chunk, embedding))

In [3]:
for i, chunk in enumerate(dataset):
    add_chunk_to_database(chunk.strip())  
    print(f'Added chunk {i + 1}/{len(dataset)} to the database')

Added chunk 1/150 to the database
Added chunk 2/150 to the database
Added chunk 3/150 to the database
Added chunk 4/150 to the database
Added chunk 5/150 to the database
Added chunk 6/150 to the database
Added chunk 7/150 to the database
Added chunk 8/150 to the database
Added chunk 9/150 to the database
Added chunk 10/150 to the database
Added chunk 11/150 to the database
Added chunk 12/150 to the database
Added chunk 13/150 to the database
Added chunk 14/150 to the database
Added chunk 15/150 to the database
Added chunk 16/150 to the database
Added chunk 17/150 to the database
Added chunk 18/150 to the database
Added chunk 19/150 to the database
Added chunk 20/150 to the database
Added chunk 21/150 to the database
Added chunk 22/150 to the database
Added chunk 23/150 to the database
Added chunk 24/150 to the database
Added chunk 25/150 to the database
Added chunk 26/150 to the database
Added chunk 27/150 to the database
Added chunk 28/150 to the database
Added chunk 29/150 to the dat

In [4]:
print(VECTOR_DB[:3])

[('On average, cats spend 2/3 of every day sleeping. That means a nine-year-old cat has been awake for only three years of its life.', [-0.035970155, -0.022440959, 0.046182215, -0.08066643, 0.036877893, -0.013563703, 0.07768966, 0.05330549, -0.01427995, -0.0018077219, -0.020548094, -0.0063343393, -0.05817062, 0.013200637, -0.045735165, 0.04127654, 0.086985126, 0.011763422, -0.03238907, -0.029227162, 0.0053657885, 0.026087534, -0.026039077, -0.0013672186, 0.05169447, -0.013776624, -0.010711662, -0.0033302167, 0.0034236843, -0.012261344, 0.0374168, -0.03157604, -0.03354208, 0.007364446, 0.025911601, -0.03590277, -0.01390067, -0.03479717, 0.01369166, 0.03293051, -0.034021743, -0.012981715, -0.019302616, 0.014980921, -0.029776828, -0.017774286, -0.002222332, -0.013286415, 0.02611608, 0.016452605, -0.032209843, 0.00114516, 0.0044678138, -0.0055274135, -0.019717569, 0.039547388, 0.010589137, -0.0028480054, -0.007264143, 0.024238866, 0.043130234, 0.0061858585, 0.008503005, -0.015804902, -0.01

In [5]:
def cosine_similarity(a, b):
    dot_product = sum(x * y for x, y in zip(a, b))
    norm_a = sum(x ** 2 for x in a) ** 0.5
    norm_b = sum(x ** 2 for x in b) ** 0.5
    return dot_product / (norm_a * norm_b)

In [6]:
def retrieve(chunk, top_n=3):
    query_embedding = ollama.embed(model=EMBEDDING_MODEL, input=chunk)['embeddings'][0]
    
    
    similarities = []
    
    for chunk, embedding in VECTOR_DB:
        similarity = cosine_similarity(query_embedding, embedding)
        similarities.append((chunk, similarity))
    
    
    similarities.sort(key=lambda x: x[1], reverse=True)
    
    
    return similarities[:top_n]


input_query = input('Ask me a question: ')
retrieved_knowledge = retrieve(input_query)

print('Retrieved knowledge:')
for chunk, similarity in retrieved_knowledge:
    print(f' - (similarity: {similarity:.2f}) {chunk}')



#instruction_prompt = f'''You are a helpful chatbot.
#Use only the following pieces of context to answer the question. Don't make up any new information:
#'''
instruction_prompt = f'''You are a helpful chatbot.
Use only the following pieces of context to answer the question. Don't make up any new information:
{'\n'.join([f' - {chunk}' for chunk, similarity in retrieved_knowledge])}
'''



stream = ollama.chat(
    model=LANGUAGE_MODEL,
    messages=[
        {'role': 'system', 'content': instruction_prompt},
        {'role': 'user', 'content': input_query},
    ],
    stream=True,
)


print('Chatbot response:')
for chunk in stream:
    print(chunk['message']['content'], end='', flush=True)


Retrieved knowledge:
 - (similarity: 0.82) A cat can travel at a top speed of approximately 31 mph (49 km) over a short distance.
 - (similarity: 0.69) A cat’s heart beats nearly twice as fast as a human heart, at 110 to 140 beats a minute.
 - (similarity: 0.67) Cats are extremely sensitive to vibrations. Cats are said to detect earthquake tremors 10 or 15 minutes before humans can.
Chatbot response:
A cat can travel approximately 31 mph (49 km) over a short distance.