In [1]:
pip install sentence-transformers faiss-cpu requests python-dotenv numpy


Collecting sentence-transformers
  Using cached sentence_transformers-5.1.0-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp313-cp313-win_amd64.whl.metadata (5.2 kB)
Collecting requests
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting python-dotenv
  Using cached python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting numpy
  Using cached numpy-2.3.2-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-4.56.0-py3-none-any.whl.metadata (40 kB)
Collecting tqdm (from sentence-transformers)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Using cached torch-2.8.0-cp313-cp313-win_amd64.whl.metadata (30 kB)
Collecting scikit-learn (from sentence-transformers)
  Using cached scikit_learn-1.7.1-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting scipy (from sentence


[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import os
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
import json
import requests
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

llm_model = "llama-3.3-70b-versatile"
API_URL = "https://api.groq.com/openai/v1/chat/completions"

In [4]:
""" Step 3: Load the Embedding Model """
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
embedding_dim = embedding_model.get_sentence_embedding_dimension()  # Typically 384

In [5]:
""" Step 4: Prepare Your Documents """
documents = [
    "The capital of France is Paris.",
    "Python is a popular programming language.",
    "Machine learning is a subset of artificial intelligence.",
    "The Eiffel Tower is in Paris.",
    "Groq provides fast AI inference."
]

In [6]:
""" Step 5: Generate Embeddings for Documents """
doc_embeddings = embedding_model.encode(documents)
doc_embeddings = np.array(doc_embeddings).astype('float32')


In [7]:
""" Step 6: Set Up FAISS Vector Database """
index = faiss.IndexFlatL2(embedding_dim)
index.add(doc_embeddings)

In [8]:
""" Step 7: Define Retrieval and Response Functions """

def retrieve_documents(query, top_k=3):
    query_embedding = embedding_model.encode([query])
    query_embedding = np.array(query_embedding).astype('float32')
    distances, indices = index.search(query_embedding, top_k)
    retrieved_docs = [documents[i] for i in indices[0]]
    return retrieved_docs

def generate_response(query, retrieved_docs, temperature=0.4):
    context = "\n".join([f"- {doc}" for doc in retrieved_docs])

    prompt = f"""
    You are a concise and accurate assistant. Use the provided context to answer the query directly and clearly. 
    If the context doesn't contain relevant information, then simply say: **Can't provide a valid ans**.

    Context:
    {context}

    Query: {query}

    Answer:
    """

    headers = {
        "Authorization": f"Bearer {GROQ_API_KEY}",
        "Content-Type": "application/json"
    }

    data = {
        "model": llm_model,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        "temperature": temperature
    }

    try:
        response = requests.post(API_URL, headers=headers, data=json.dumps(data))
        if response.status_code == 200:
            response_data = response.json()
            return response_data['choices'][0]['message']['content'].strip()
        else:
            return f"Error {response.status_code}: {response.text}"
    except requests.RequestException as e:
        return f"API request failed: {str(e)}"

In [9]:
""" Example Usage """
if __name__ == "__main__":
    query = "What is Django?"
    retrieved = retrieve_documents(query, top_k=2)
    print("Retrieved Documents:", retrieved)

    response = generate_response(query, retrieved)
    print("Generated Response:", response)


Retrieved Documents: ['Python is a popular programming language.', 'Machine learning is a subset of artificial intelligence.']
Generated Response: **Can't provide a valid answer**
