# RAG Chatbot for Portfolio
This notebook implements a Retrieval-Augmented Generation (RAG) chatbot using FAISS for vector search and Groq LLM for responses.

In [None]:
# Install required packages (run once)
# !pip install pdfplumber faiss-cpu numpy sentence-transformers langchain-groq

In [None]:
import os
import pdfplumber
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from langchain_groq import ChatGroq

In [None]:
# Configuration
groq_api_key = 'YOUR_GROQ_API_KEY_HERE'  # Replace with your actual API key from https://console.groq.com

llm = ChatGroq(
    temperature=0.3,
    groq_api_key=groq_api_key,
    model_name='llama-3.3-70b-versatile'
)

pdf_file_path = 'DATA.pdf'

In [None]:
# Read and Process PDF
def load_pdf_text(pdf_path):
    pdf_text = ''
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                pdf_text += page_text + '\n'
    return pdf_text

def structure_text(text):
    sections = text.split('\n# ')
    documents = {}
    for section in sections:
        lines = section.split('\n')
        title = lines[0].strip()
        content = '\n'.join(lines[1:]).strip()
        if title and content:
            documents[title] = content
    return documents

pdf_text = load_pdf_text(pdf_file_path)
structured_data = structure_text(pdf_text)

print('Structured sections extracted:', len(structured_data))

In [None]:
# Create FAISS Index
embedder = SentenceTransformer('all-MiniLM-L6-v2')

document_keys = list(structured_data.keys())
document_texts = [structured_data[key] for key in document_keys]
doc_embeddings = embedder.encode(document_texts, convert_to_numpy=True)

embedding_dim = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(doc_embeddings)
print('FAISS index built with', index.ntotal, 'documents.')

In [None]:
# Build RAG Prompt
MAX_CONTEXT_LENGTH = 5000

def build_prompt(query, top_k=5):
    query_embedding = embedder.encode(query, convert_to_numpy=True)
    distances, indices = index.search(np.array([query_embedding]), top_k)

    retrieved_sections = [document_keys[i] + ':\n' + document_texts[i] for i in indices[0]]

    context = '\n\n'.join(retrieved_sections)
    if len(context) > MAX_CONTEXT_LENGTH:
        context = context[:MAX_CONTEXT_LENGTH]

    prompt = f'''You are Tejas's personal AI assistant embedded on his portfolio website.

FORMATTING RULES:
- Use bullet points to list items - avoid long paragraphs
- Use relevant emojis to make responses engaging
- Keep each bullet point short and scannable
- Start with a brief 1-line intro, then use bullets for details

Context:
{context}

Question: {query}
Answer:'''
    return prompt

In [None]:
# Test the RAG Chatbot
query = 'Tell me about Tejas projects'

prompt = build_prompt(query, top_k=5)
print('=== Prompt Sent to Model ===')
print(prompt[:500], '...')

response = llm.invoke(prompt)
print('\n=== Generated Answer ===')
print(response.content)

In [None]:
# Interactive Chat Loop
def chat():
    print("Tejas's Portfolio Chatbot (type 'quit' to exit)")
    print("="*50)
    
    while True:
        query = input('\nYou: ')
        if query.lower() in ['quit', 'exit', 'q']:
            print('Goodbye!')
            break
        
        prompt = build_prompt(query)
        response = llm.invoke(prompt)
        print(f'\nAssistant: {response.content}')

# Uncomment to run interactive chat
# chat()