# Testing the API

In [1]:
from fastapi import FastAPI
import requests
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from typing import Dict

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
baseurl = 'https://en.wikipedia.org/wiki/Brazil'
query = "What is the population of Brazil?"
# query = "when was the Treaty of Tordesillas?"
# query = "When did Pedro Álvares Cabral land in Brazil?"
# query = "How many states does Brazil have?"
# query = "What is the capital of Brazil?"

In [3]:
# Load the embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# model = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2")
# model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# Storage for indexed sites
index_storage: Dict[str, Dict] = {}

In [4]:
def index_url(url: str):
    """Indexes the extracted text by creating embeddings and storing them in FAISS."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        text = [p.get_text() for p in soup.find_all("p")]
        # print(text)
        sentences = text#.split("\n")
        # print(sentences)

        embeddings = model.encode(sentences, convert_to_numpy=True)
        norm_embeddings = np.linalg.norm(embeddings,axis=1,keepdims=True)
        emb = embeddings / norm_embeddings
        d = emb.shape[1]
        faiss_index = faiss.IndexFlatIP(d)
        faiss_index.add(emb)

        index_storage[url] = {
            "faiss_index": faiss_index,
            "sentences": sentences,
            "embeddings": embeddings
        }

        return {"message": "URL indexed successfully"}
    except requests.RequestException as e:
        return {"error": f"Failed to fetch URL: {e}"}

In [5]:
index_url(baseurl)
# index_storage[baseurl]['embeddings'].shape

{'message': 'URL indexed successfully'}

In [6]:
if baseurl not in index_storage:
        print("error : URL not indexed. Please index it first.")

query_embedding = model.encode([query],convert_to_numpy=True)
query_embedding_norm = np.linalg.norm(query_embedding, axis=1, keepdims=True)
query_emb = query_embedding / query_embedding_norm
faiss_index = index_storage[baseurl]["faiss_index"]
sentences = index_storage[baseurl]["sentences"]

In [7]:
cossim, idxs = faiss_index.search(query_embedding, k=1)

In [8]:
cossim, idxs

(array([[0.7869417]], dtype=float32), array([[96]]))

In [9]:
for i in idxs[0]:
    print(sentences[i])
    print('\n')

According to the latest official projection, it is estimated that Brazil’s population was 210,862,983 on July 1, 2022—an adjustment of 3.9% from the initial figure of 203 million reported by the 2022 census.[354] The population of Brazil, as recorded by the 2008 PNAD, was approximately 190 million[355] (22.31 inhabitants per square kilometer or 57.8/sq mi), with a ratio of men to women of 0.95:1[356] and 83.75% of the population defined as urban.[357] The population is heavily concentrated in the Southeastern (79.8 million inhabitants) and Northeastern (53.5 million inhabitants) regions, while the two most extensive regions, the Center-West and the North, which together make up 64.12% of the Brazilian territory, have a total of only 29.1 million inhabitants.





In [13]:
from llama_cpp import Llama
import os

# MODEL_PATH = "models/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
MODEL_PATH = "models/mistral-7b-instruct-v0.2.Q4_K_S.gguf"
# MODEL_PATH = "models/tinyllama-1.1b-chat-v1.0.Q5_K_S.gguf"
# MODEL_PATH = 'models/phi-2.Q5_K_M.gguf'
# MODEL_PATH = 'models/InstructLM-1.3B.Q8_0.gguf'

# Load the model once when the API starts
llm = None
if os.path.exists(MODEL_PATH):
    llm = Llama(model_path=MODEL_PATH,n_ctx=2048,verbose=False)
    print("✅ LLM Loaded Successfully")
else:
    print("❌ Model file not found! Download it to 'models/'")

llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)
ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)
ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_mul_mm_bf16_f32                   (not supported)
ggml_metal_init: skipping kernel_mul_mm_id_bf16_f32                (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h80           (not supported)
ggml_metal_init: skipping kernel_flash_attn_ext_b

✅ LLM Loaded Successfully


In [14]:
context = "\n".join(sentences[i] for i in idxs[0])
print(context)

According to the latest official projection, it is estimated that Brazil’s population was 210,862,983 on July 1, 2022—an adjustment of 3.9% from the initial figure of 203 million reported by the 2022 census.[354] The population of Brazil, as recorded by the 2008 PNAD, was approximately 190 million[355] (22.31 inhabitants per square kilometer or 57.8/sq mi), with a ratio of men to women of 0.95:1[356] and 83.75% of the population defined as urban.[357] The population is heavily concentrated in the Southeastern (79.8 million inhabitants) and Northeastern (53.5 million inhabitants) regions, while the two most extensive regions, the Center-West and the North, which together make up 64.12% of the Brazilian territory, have a total of only 29.1 million inhabitants.



In [15]:
prompt = f"Based exclusively on the context given answer in ONE phrase: {query}. \n Context:\n{context}"

In [16]:
response = llm(prompt,max_tokens=128)['choices'][0]['text']
response

'Answer: The population of Brazil was approximately 210.86 million on July 1, 2022.'

In [None]:
def ask(url: str, question: str):
    """Finds the most relevant sentence based on the question using FAISS."""
    if url not in index_storage:
        return {"error": "URL not indexed. Please index it first."}
    
    question_embedding = model.encode([question], convert_to_numpy=True)
    faiss_index = index_storage[url]["faiss_index"]
    sentences = index_storage[url]["sentences"]
    
    _, I = faiss_index.search(question_embedding, k=2)
    best_match = sentences[I[0][0]] if I[0][0] < len(sentences) else "No relevant answer found."
    
    return {"answer": best_match}

In [6]:
_, I=ask(url=baseurl,question='what is the population of Brazil?')

ValueError: not enough values to unpack (expected 2, got 1)

In [50]:
# llm = Llama.from_pretrained(
#     repo_id = 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF',
#     filename = 'mistral-7b-instruct-v0.2.Q4_K_M.gguf',
#     local_dir = 'models',
#     verbose = True
# )

In [None]:
# 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
# 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