In [None]:
import os
import openai
import numpy as np

# STEP 1: Set API Key
openai.api_key = os.getenv("OPENAI_API_KEY")
if not openai.api_key:
    import getpass
    openai.api_key = getpass.getpass("Enter your OpenAI API Key: ")

In [None]:
# STEP 2: Load local .txt files from a folder
def load_documents_from_folder(folder_path):
    documents = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
                documents.append(f.read())
    return documents

folder_path = "./docs"  
docs_list = load_documents_from_folder(folder_path)

In [None]:
# STEP 3: Chunk each document
def split_text(text, chunk_size=1000):
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

chunks = []
for doc in docs_list:
    chunks.extend(split_text(doc))

In [None]:
# STEP 4: Generate embeddings for each chunk
def get_embedding(text):
    response = openai.Embedding.create(
        model="text-embedding-3-small",
        input=text
    )
    return response["data"][0]["embedding"]

chunk_embeddings = [get_embedding(chunk) for chunk in chunks]

In [None]:
# --- STEP 5: Build FAISS index ---
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings).astype("float32"))  

In [None]:
# --- STEP 6: Embed user question and search ---
question = "agent memory"
question_embedding = np.array(get_embedding(question)).astype("float32")
top_k = 3
distances, indices = index.search(np.array([question_embedding]), top_k)

# --- STEP 7: Retrieve top documents ---
retrieved_chunks = [chunks[i] for i in indices[0]]

In [None]:
# --- STEP 8: Grade relevance using GPT
def grade_relevance(document, question):
    system_prompt = """You are a grader checking if a document is relevant to the user's question.
If the document contains keyword(s) or semantic meaning related to the question, say 'yes'. Else, say 'no'."""
    
    response = openai.ChatCompletion.create(
        model="gpt-4o",
        temperature=0,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Document:\n{document}\n\nQuestion: {question}"}
        ]
    )
    return response["choices"][0]["message"]["content"].strip()

print("\n--- Relevance Grading ---")
for i, chunk in enumerate(retrieved_chunks):
    print(f"\nChunk {i+1}:\n{grade_relevance(chunk, question)}")

In [None]:
# --- STEP 9: Answer generation using GPT ---
def generate_answer(question, context_chunks):
    context = "\n\n".join(context_chunks)
    prompt = f"""You are a helpful assistant. Use the following context to answer the user's question.

Context:
{context}

Question: {question}
Answer:"""
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        temperature=0,
        messages=[{"role": "user", "content": prompt}]
    )
    return response["choices"][0]["message"]["content"].strip()

print("\n--- Final Answer ---")
answer = generate_answer(question, retrieved_chunks)
print(answer)