# Testing the API

First of, you need to run the following command on terminal to start running the api on the local server:

uvicorn main:api --reload

In [1]:
url_to_index = 'https://en.wikipedia.org/wiki/Brazil'
query1 = "What is the population of Brazil?"
query2 = "when was the Treaty of Tordesillas?"
query3 = "When did Pedro Álvares Cabral land in Brazil?"
query4 = "Who was Pedro Alvares Cabral?"
query5 = "How many states does Brazil have?"
query6 = "What is the capital of Brazil?"
followup_query11 = "and how big is its territory?"
followup_query12 = "and when was the first settlement established?"

url_to_index2 = "https://en.wikipedia.org/wiki/France"
query21 = "What is capital of France?"
followup_query21 = "how many people live there?"
followup_query22 = "and who is the president?"
followup_query23 = "Which was the last quesiont I asked?"

In [2]:
import requests

BASE_URL = "http://127.0.0.1:8000"  # Make sure FastAPI is running

# Test Indexing
index_response = requests.post(f"{BASE_URL}/index_url/", params={"url": url_to_index})

print(index_response.json())  # Should return "URL indexed successfully"

{'message': 'URL indexed successfully'}


In [3]:
# Test Asking
ask_response = requests.get(f"{BASE_URL}/ask/", params={"url": url_to_index, "question": query1})

print(ask_response.json())  # Should return the answer

{'answer': 'The population of Brazil is approximately 210.86 million.'}


In [4]:
# Test retrieval 
retrieval_response = requests.get(f"{BASE_URL}/get_retrieval_text_and_similarity/", params={"url": url_to_index, "question": query1})

print(retrieval_response.json()) # Should return the best mathcing paragraph and its cossine similarity

{'context': 'According to the latest official projection, it is estimated that Brazil’s population was 210,862,983 on July 1, 2022—an adjustment of 3.9% from the initial figure of 203 million reported by the 2022 census.[354] The population of Brazil, as recorded by the 2008 PNAD, was approximately 190 million[355] (22.31 inhabitants per square kilometer or 57.8/sq\xa0mi), with a ratio of men to women of 0.95:1[356] and 83.75% of the population defined as urban.[357] The population is heavily concentrated in the Southeastern (79.8\xa0million inhabitants) and Northeastern (53.5\xa0million inhabitants) regions, while the two most extensive regions, the Center-West and the North, which together make up 64.12% of the Brazilian territory, have a total of only 29.1\xa0million inhabitants.\n', 'cossine_similarity': '[[0.7869417]]'}


In [6]:
# Test chat
user_id = "defaultuser"
chat_response = requests.get(f"{BASE_URL}/chat/", params={"url": url_to_index, "question": query1,
                                                           "user_id": user_id})

print(chat_response.json())  # Should return the answer

{'answer': 'Approximately 211 million people.'}


In [7]:
# Test followup questions
followup11_response = requests.get(f"{BASE_URL}/chat/", params={"url": url_to_index, "question": followup_query11,
                                                           "user_id": user_id})
print(followup11_response.json())  # Should return the answer

{'answer': "The size of Brazil's territory is approximately 8.5 million square kilometers (3.3 million square miles)."}


In [8]:
# Test followup questions
followup12_response = requests.get(f"{BASE_URL}/chat/", params={"url": url_to_index, "question": followup_query12,
                                                           "user_id": user_id})
print(followup12_response.json())  # Should return the answer

{'answer': 'The first settlement was established in 1532.'}


In [9]:
# Test get chat history
chat_history_response = requests.get(f"{BASE_URL}/get_chat_history/", params={"user_id": user_id, "url": url_to_index})

print(chat_history_response.json())  # Should return the chat history # we just keep the last 10 messages

{'chat_history': "Chatbot:Approximately 211 million people.\nUser:and how big is its territory?\nChatbot:The size of Brazil's territory is approximately 8.5 million square kilometers (3.3 million square miles).\nUser:and when was the first settlement established?\nChatbot:The first settlement was established in 1532."}


In [7]:
from fastapi import FastAPI, Query
import requests
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer, CrossEncoder
import faiss
import numpy as np
from typing import Dict

In [8]:
baseurl = 'https://en.wikipedia.org/wiki/Brazil'
query = "What is the population of Brazil?"
# query = "when was the Treaty of Tordesillas?"
# query = "When did Pedro Álvares Cabral land in Brazil?"
# query = "Who was Pedro Alvares Cabral?"
# query = "How many states does Brazil have?"
# query = "What is the capital of Brazil?"
# followup_query = "and when was the first settlement established?"
followup_query = "and how big is its territory?"

# baseurl = "https://en.wikipedia.org/wiki/France"
# query = "What is capital of France?"
# followup_query = "how many people live there?"
# followup_query2 = "and who is the president?"
# followup_query3 = "Which was the last quesiont I asked?"

In [9]:
# Load the embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2")
# model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")  # Reranker model

# Storage for indexed sites
index_storage: Dict[str, Dict] = {}

#chat history dict
chat_history: Dict[str,list] = {}

In [10]:
def index_url2(url: str):
    """Indexes the extracted text by creating embeddings and storing them in FAISS."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        text = [p.get_text() for p in soup.find_all("p")]
        # print(text)
        sentences = text#.split("\n")
        # print(sentences)

        embeddings = model.encode(sentences, convert_to_numpy=True)
        norm_embeddings = np.linalg.norm(embeddings,axis=1,keepdims=True)
        emb = embeddings / norm_embeddings
        d = emb.shape[1]
        faiss_index = faiss.IndexFlatIP(d)
        faiss_index.add(emb)

        index_storage[url] = {
            "faiss_index": faiss_index,
            "sentences": sentences,
            "embeddings": embeddings
        }

        return {"message": "URL indexed successfully"}
    except requests.RequestException as e:
        return {"error": f"Failed to fetch URL: {e}"}

In [11]:
index_url2(baseurl)
# index_storage[baseurl]['embeddings'].shape

{'message': 'URL indexed successfully'}

In [12]:
if baseurl not in index_storage:
        print("error : URL not indexed. Please index it first.")

query_embedding = model.encode([query],convert_to_numpy=True)
query_embedding_norm = np.linalg.norm(query_embedding, axis=1, keepdims=True)
query_emb = query_embedding / query_embedding_norm
faiss_index = index_storage[baseurl]["faiss_index"]
sentences = index_storage[baseurl]["sentences"]

In [13]:
cossim, idxs = faiss_index.search(query_embedding, k=3)

In [14]:
cossim, idxs

(array([[0.7869417 , 0.6669434 , 0.65723085]], dtype=float32),
 array([[96,  1, 97]]))

In [15]:
for i in idxs[0]:
    print(sentences[i])
    print('\n')

According to the latest official projection, it is estimated that Brazil’s population was 210,862,983 on July 1, 2022—an adjustment of 3.9% from the initial figure of 203 million reported by the 2022 census.[354] The population of Brazil, as recorded by the 2008 PNAD, was approximately 190 million[355] (22.31 inhabitants per square kilometer or 57.8/sq mi), with a ratio of men to women of 0.95:1[356] and 83.75% of the population defined as urban.[357] The population is heavily concentrated in the Southeastern (79.8 million inhabitants) and Northeastern (53.5 million inhabitants) regions, while the two most extensive regions, the Center-West and the North, which together make up 64.12% of the Brazilian territory, have a total of only 29.1 million inhabitants.



Brazil,[b] officially the Federative Republic of Brazil,[c] is the largest and easternmost country in South America. It is the world's fifth-largest country by area and the seventh largest by population, with over 212 million pe

In [18]:
# Rerank results using the CrossEncoder
pairs = [(query, passage) for passage in [sentences[i] for i in idxs[0]]]
rerank_scores = reranker.predict(pairs)
pairs, rerank_scores

([('What is the population of Brazil?',
   'According to the latest official projection, it is estimated that Brazil’s population was 210,862,983 on July 1, 2022—an adjustment of 3.9% from the initial figure of 203 million reported by the 2022 census.[354] The population of Brazil, as recorded by the 2008 PNAD, was approximately 190 million[355] (22.31 inhabitants per square kilometer or 57.8/sq\xa0mi), with a ratio of men to women of 0.95:1[356] and 83.75% of the population defined as urban.[357] The population is heavily concentrated in the Southeastern (79.8\xa0million inhabitants) and Northeastern (53.5\xa0million inhabitants) regions, while the two most extensive regions, the Center-West and the North, which together make up 64.12% of the Brazilian territory, have a total of only 29.1\xa0million inhabitants.\n'),
  ('What is the population of Brazil?',
   "Brazil,[b] officially the Federative Republic of Brazil,[c] is the largest and easternmost country in South America. It is the

In [19]:
# Sort passages by reranker score (higher is better)
sorted_passages = [x for _, x in sorted(zip(rerank_scores, [sentences[i] for i in idxs[0]]), reverse=True)]
sorted_passages

['According to the latest official projection, it is estimated that Brazil’s population was 210,862,983 on July 1, 2022—an adjustment of 3.9% from the initial figure of 203 million reported by the 2022 census.[354] The population of Brazil, as recorded by the 2008 PNAD, was approximately 190 million[355] (22.31 inhabitants per square kilometer or 57.8/sq\xa0mi), with a ratio of men to women of 0.95:1[356] and 83.75% of the population defined as urban.[357] The population is heavily concentrated in the Southeastern (79.8\xa0million inhabitants) and Northeastern (53.5\xa0million inhabitants) regions, while the two most extensive regions, the Center-West and the North, which together make up 64.12% of the Brazilian territory, have a total of only 29.1\xa0million inhabitants.\n',
 "Brazil,[b] officially the Federative Republic of Brazil,[c] is the largest and easternmost country in South America. It is the world's fifth-largest country by area and the seventh largest by population, with ov

In [24]:
from llama_cpp import Llama
import os

MODEL_PATH = "models/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
# MODEL_PATH = "models/mistral-7b-instruct-v0.2.Q4_K_S.gguf"
# MODEL_PATH = 'models/llama-2-7b.Q4_K_S.gguf'
# MODEL_PATH = "models/tinyllama-1.1b-chat-v1.0.Q5_K_S.gguf"
# MODEL_PATH = 'models/phi-2.Q5_K_M.gguf'
# MODEL_PATH = 'models/InstructLM-1.3B.Q8_0.gguf'

# Load the model once when the API starts
llm = None
if os.path.exists(MODEL_PATH):
    llm = Llama(model_path=MODEL_PATH,n_ctx=2048,verbose=False,seed=42)
    print("✅ LLM Loaded Successfully")
else:
    print("❌ Model file not found! Download it to 'models/'")

llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)
ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_mul_mm_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mm_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h80           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_b

✅ LLM Loaded Successfully


In [21]:
context = "\n".join(sorted_passages[:1])
print(context)

According to the latest official projection, it is estimated that Brazil’s population was 210,862,983 on July 1, 2022—an adjustment of 3.9% from the initial figure of 203 million reported by the 2022 census.[354] The population of Brazil, as recorded by the 2008 PNAD, was approximately 190 million[355] (22.31 inhabitants per square kilometer or 57.8/sq mi), with a ratio of men to women of 0.95:1[356] and 83.75% of the population defined as urban.[357] The population is heavily concentrated in the Southeastern (79.8 million inhabitants) and Northeastern (53.5 million inhabitants) regions, while the two most extensive regions, the Center-West and the North, which together make up 64.12% of the Brazilian territory, have a total of only 29.1 million inhabitants.



In [26]:
# prompt = f"Based exclusively on the context given answer in ONE phrase: {query}. \n Context:\n{context}"
prompt = f"""Based only on the context given, answer in ONE phrase: {query}.
  
  If the answer is not in the context, please respond with 'I don't have enough information'.
  
  Context:\n{context}
  """

In [27]:
response = llm(prompt,max_tokens=128, temperature=0.0)['choices'][0]['text']
response

'\nAnswer: The population of Brazil is approximately 210.86 million.'

In [28]:
def chat2(url:str, question:str, user_id:str = 'default_user', number_stored_queries:int=5):
    """Handle follow-up questions using chat memory"""
    if url not in index_storage:
        return {"error": "URL not indexed. Please index it first."}
    
    query_embedding = model.encode([question],convert_to_numpy=True)
    query_embedding_norm = np.linalg.norm(query_embedding, axis=1, keepdims=True)
    query_emb = query_embedding / query_embedding_norm
    faiss_index = index_storage[url]["faiss_index"]
    sentences = index_storage[url]["sentences"]
    
    cossine_similarity, I = faiss_index.search(query_emb, k=1)
    context = "\n".join(sentences[i] for i in I[0])

    if user_id not in chat_history:
        chat_history[user_id] = {}
    if url not in chat_history[user_id]:
        chat_history[user_id][url] = []
    
    chat_history[user_id][url].append(f"User : {question}")

    past_conversation = "\n".join(chat_history[user_id][url][-number_stored_queries:])

    print(past_conversation)

    prompt = f"""Based on the context and on the past conversation given, answer in ONE phrase: {question}.
    
    Context:\n{context}

    Last questions:\n{past_conversation}
    """

    response = llm(prompt,max_tokens=128,temperature=0.0)['choices'][0]['text']

    chat_history[user_id][url].append(f"LLM : {response}")
    
    return {"answer": response}#
            #'cossim': cossine_similarity[0][0].item(),
            #'most_similar_paragraph': sentences[I[0][0]]}


In [32]:
chat2(baseurl, followup_query12 ,user_id='andre')

User : What is the population of Brazil?
LLM : 
    Bot: The population of Brazil was approximately 210.86 million as of July 1, 2022.
User : and how big is its territory?
LLM : 
Answer: Brazil is the fifth largest country by territory, covering approximately 8.5 million square kilometers (3.3 million square miles).
User : and when was the first settlement established?


{'answer': '\nAnswer: The first settlement in Brazil was established in 1532.'}

In [33]:
chat_history['andre'][baseurl]

['User : What is the population of Brazil?',
 'LLM : \n    Bot: The population of Brazil was approximately 210.86 million as of July 1, 2022.',
 'User : and how big is its territory?',
 'LLM : \nAnswer: Brazil is the fifth largest country by territory, covering approximately 8.5 million square kilometers (3.3 million square miles).',
 'User : and when was the first settlement established?',
 'LLM : \nAnswer: The first settlement in Brazil was established in 1532.']

In [None]:
def ask(url: str, question: str):
    """Finds the most relevant sentence based on the question using FAISS."""
    if url not in index_storage:
        return {"error": "URL not indexed. Please index it first."}
    
    question_embedding = model.encode([question], convert_to_numpy=True)
    faiss_index = index_storage[url]["faiss_index"]
    sentences = index_storage[url]["sentences"]
    
    _, I = faiss_index.search(question_embedding, k=2)
    best_match = sentences[I[0][0]] if I[0][0] < len(sentences) else "No relevant answer found."
    
    return {"answer": best_match}

In [6]:
_, I=ask(url=baseurl,question='what is the population of Brazil?')

ValueError: not enough values to unpack (expected 2, got 1)

In [44]:
llm = Llama.from_pretrained(
    repo_id = 'TheBloke/Llama-2-7B-GGUF',
    filename = 'llama-2-7b.Q4_K_S.gguf',
    local_dir = 'models',
    verbose = True
)

llama_model_load_from_file_impl: using device Metal (Apple M3) - 9836 MiB free
llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from models/mistral-7b-instruct-v0.2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.

In [None]:
# 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
# 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