<a href="https://colab.research.google.com/github/MeenakshiRajpurohit/CMPE-252-AI-and-Data-Engineering/blob/main/RAG_IMPLEMENTATION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# Install required packages
!pip install -q langchain langchain-core langchain-text-splitters langchain-community langchain-huggingface huggingface-hub sentence-transformers faiss-cpu transformers torch

# Import libraries
import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEndpoint  # Use HuggingFaceEndpoint instead
from google.colab import userdata

# Set up API key from Colab secrets
try:
    huggingface_api_key = userdata.get('HUGGINGFACEHUB_API_TOKEN')
except:
    from getpass import getpass
    huggingface_api_key = getpass('Enter your Hugging Face API token: ')

# Set environment variable
os.environ["HUGGINGFACEHUB_API_TOKEN"] = huggingface_api_key

# Create sample document
os.makedirs('data', exist_ok=True)
sample_text = """
Polar bears are facing significant threats due to climate change.
The melting of Arctic sea ice is reducing their hunting grounds.
Polar bears primarily hunt seals from sea ice platforms.
Without adequate ice, polar bears struggle to find food.
Scientists consider polar bears to be vulnerable to extinction.
The loss of sea ice habitat is the primary danger to polar bears.
Conservation efforts are underway to protect polar bear populations.
"""

with open('data/my_document.txt', 'w') as f:
    f.write(sample_text)

# Load the document
loader = TextLoader('data/my_document.txt')
documents = loader.load()
print(f"✓ Loaded {len(documents)} document(s)")

# Split the document into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
document_chunks = splitter.split_documents(documents)
print(f"✓ Split into {len(document_chunks)} chunks")

# Initialize embeddings
print("Loading embeddings model...")
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
print("✓ Embeddings loaded")

# Create FAISS vector store
print("Creating vector store...")
vector_store = FAISS.from_documents(document_chunks, embeddings)
print("✓ Vector store created")

# Get retriever
retriever = vector_store.as_retriever()

# Initialize the LLM using HuggingFaceEndpoint (more stable)
print("Initializing LLM...")
llm = HuggingFaceEndpoint(
    repo_id="google/flan-t5-large",
    task="text2text-generation",
    huggingfacehub_api_token=huggingface_api_key, # Pass the API key directly
    temperature=0.7, # Moved out of model_kwargs
    max_new_tokens=512 # Changed from max_length to max_new_tokens
)
print("✓ LLM initialized")

# Create simple RAG function
def ask_question(query):
    """Simple RAG function"""
    # Retrieve relevant documents
    relevant_docs = retriever.invoke(query)

    # Combine context
    context = "\n".join([doc.page_content for doc in relevant_docs])

    # Create prompt - simplified for T5 model
    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"

    # Get response
    response = llm.invoke(prompt)

    return response

# Example query
query = "Are polar bears in danger?"
print(f"\n{'='*50}")
print(f"Query: {query}")
print(f"{'='*50}")
print("Generating response...\n")

try:
    response = ask_question(query)

    # Print response
    print("="*50)
    print("RESPONSE:")
    print("="*50)
    print(response)
except Exception as e:
    print(f"Error occurred: {e}")
    print("\nTrying alternative approach...")

    # Fallback: Use a local pipeline
    from transformers import pipeline

    print("Loading local model...")
    qa_pipeline = pipeline(
        "text2text-generation",
        model="google/flan-t5-base",
        max_length=512
    )

    relevant_docs = retriever.invoke(query)
    context = "\n".join([doc.page_content for doc in relevant_docs])
    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"

    response = qa_pipeline(prompt)[0]['generated_text']

    print("="*50)
    print("RESPONSE:")
    print("="*50)
    print(response)

Enter your Hugging Face API token: ··········
✓ Loaded 1 document(s)
✓ Split into 1 chunks
Loading embeddings model...


                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


✓ Embeddings loaded
Creating vector store...
✓ Vector store created
Initializing LLM...
✓ LLM initialized

Query: Are polar bears in danger?
Generating response...

Error occurred: InferenceClient.text_generation() got an unexpected keyword argument 'max_length'

Trying alternative approach...
Loading local model...


Device set to use cuda:0


RESPONSE:
polar bears are facing significant threats due to climate change
