## 1. Install and Import Libraries

In [None]:
# Install the necessary libraries
!pip install -q sentence-transformers faiss-cpu groq

In [None]:
# Import the required libraries
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq

import os

## 2. Create the Knowledge Base

In [None]:
knowledge_base_dir = "/content/drive/MyDrive/Jupiter_Files" # The folder where your .txt files are stored
knowledge_base = [] # An empty list to hold the content of the files

for filename in os.listdir(knowledge_base_dir):
    if filename.endswith(".txt"):
        filepath = os.path.join(knowledge_base_dir, filename)
        with open(filepath, 'r', encoding='utf-8') as f:
            knowledge_base.append(f.read())

# Print the knowledge base to verify it loaded correctly
# print(knowledge_base)

## 3. Create Embeddings and the Search Index

In [None]:
# Load a pre-trained sentence-transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Convert our knowledge base into embeddings
knowledge_base_embeddings = model.encode(knowledge_base, convert_to_tensor=True)

# Create a FAISS index for our embeddings
index = faiss.IndexFlatL2(knowledge_base_embeddings.shape[1])

# Add our knowledge base embeddings to the index
index.add(knowledge_base_embeddings.cpu().numpy())

## 4. Define the Search Function

In [None]:
def search(query, k=1):
    """
    Takes a user's query and returns the most relevant articles.
    """
    # Convert the user's query into an embedding
    query_embedding = model.encode([query], convert_to_tensor=True)

    # Search the FAISS index for the most similar embeddings
    distances, indices = index.search(query_embedding.cpu().numpy(), k)

    # Return the most relevant articles
    return [knowledge_base[i] for i in indices[0]]

## 5. Set Up the LLM

In [None]:
from google.colab import userdata
GROQ_API_KEY = userdata.get('GROQ_API_KEY')

# IMPORTANT: Paste your Groq API key here.
# API_KEY = "GROQ_API_KEY"

# Initialize the Groq client
client = Groq(api_key=GROQ_API_KEY)

def get_completion(prompt):
    """
    Sends a prompt to the Groq API and returns the model's response.
    """
    try:
        response = client.chat.completions.create(
            model="llama3-8b-8192",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"An error occurred: {e}")
        return "Sorry, there was an error connecting to the service."

## 6. Define the RAG System

In [None]:
def rag_system(query):
    """
    Combines the search and LLM components to create our RAG system.
    """
    # 1. Retrieve relevant articles
    relevant_articles = search(query)

    # 2. Create a prompt for the LLM
    prompt = f"""
    **Instructions:**
    - You are a helpful assistant that answers questions based ONLY on the context provided below.
    - Do NOT use any external or prior knowledge.
    - If the answer is not found in the context, you must explicitly state: "I cannot answer that question based on the provided documents."
    - Answer in bullet points if answer is long.


    Context:
    {' '.join(relevant_articles)}

    Question:
    {query}

    Answer:
    """

    # 3. Generate an answer
    return get_completion(prompt)

## 7. Ask a Question

In [None]:
# This block creates an interactive loop that lets you chat with the RAG system.
print("RAG System is ready. Ask a question or type 'exit' to stop.")
print("-" * 50)

while True:
    user_query = input("You: ")

    if user_query.lower() == 'exit':
        print("Bot: Goodbye!")
        break

    answer = rag_system(user_query)
    print(f"Bot: {answer}\n")