In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# 🔎 AGI Voice Agent - RAG Pipeline Build (Notebook 11)

### Objective:
Implement Retrieval-Augmented Generation (RAG) to:
- Retrieve knowledge from the 73 datasets
- Enhance the AGI agent’s reasoning with relevant context
- Generate more accurate and informed responses






📌 1. Install Required Libraries

!pip install langchain sentence-transformers faiss-cpu transformers




📌 2. Load Libraries

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
import pandas as pd




📌 3. Prepare Embeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Example dataset
df = pd.read_parquet('processed/ai_reasoning_features.parquet')
documents = df['question'].fillna('').tolist()

# Create embeddings
doc_embeddings = embedding_model.embed_documents(documents)




📌 4. Create FAISS Vector Store

db = FAISS.from_texts(documents, embedding_model)
db.save_local("vectorstore/agi_voice_agent_faiss")




📌 5. Load Vector Store for Retrieval

db = FAISS.load_local("vectorstore/agi_voice_agent_faiss", embedding_model)




📌 6. Retrieve Relevant Context

def retrieve_context(query, k=3):
    results = db.similarity_search(query, k=k)
    return "\n".join([r.page_content for r in results])

query = "Explain the theory of consciousness"
context = retrieve_context(query)
print("Retrieved Context:\n", context)




📌 7. Connect to Language Model for Generation

generator = pipeline('text-generation', model='gpt2', max_length=200)
llm = HuggingFacePipeline(pipeline=generator)

def generate_rag_response(query):
    context = retrieve_context(query)
    prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"
    response = llm(prompt)
    return response[0]['generated_text']

# Example
response = generate_rag_response(query)
print("RAG Response:\n", response)




SyntaxError: invalid character '’' (U+2019) (3315430429.py, line 6)