<a href="https://colab.research.google.com/github/NandhiniAnne/nandhiniresumechatbot/blob/main/Resume_Chatbot_%26_Semantic_Search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
# --- AI-Powered Multi-Resume Chatbot ---
# This script launches a Gradio web interface to analyze multiple resumes.
# It uses semantic search to find relevant information and the Gemini API
# to provide high-quality, synthesized answers to user questions.

# --- Step 1: Import necessary libraries ---
import gradio as gr
from sentence_transformers import SentenceTransformer
import faiss
import fitz  # PyMuPDF
import json
import os
import asyncio
import aiohttp
import numpy as np
from dotenv import load_dotenv

# --- Step 2: Securely load API Key ---
# This looks for a .env file in the same directory and loads the key.
# Make sure your .env file has: GEMINI_API_KEY="YOUR_SECRET_KEY"
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")


# --- 3. Core Functions: Parsing, Indexing, Searching, and AI Answering ---

def load_and_parse_resume_by_line(pdf_path, filename):
    """Loads a single PDF and parses it line-by-line to create clean sections."""
    try:
        doc = fitz.open(pdf_path)
        full_text = "".join(page.get_text() for page in doc)
        doc.close()
    except Exception as e:
        print(f"Error reading PDF {filename}: {e}")
        return []

    headings = [
        'OBJECTIVE', 'EDUCATION', 'SKILLS', 'PROJECTS', 'CERTIFICATIONS',
        'EXTRACURRICULAR ACTIVITIES', 'ADDITIONAL INFORMATION', 'WORK EXPERIENCE',
        'EXPERIENCE', 'EMPLOYMENT HISTORY', 'PUBLICATIONS', 'SUMMARY', 'CONTACT'
    ]
    lines = full_text.split('\n')
    chunks = []
    current_chunk_lines = []

    for line in lines:
        cleaned_line = line.strip()
        # Check if the line itself is a heading
        is_heading = cleaned_line.upper() in headings

        if is_heading:
            # If we find a heading, the previous chunk is complete
            if current_chunk_lines:
                chunks.append("\n".join(current_chunk_lines).strip())
            # Start a new chunk with the current heading
            current_chunk_lines = [cleaned_line]
        else:
            # If it's not a heading, add the line to the current chunk
            if cleaned_line:
                current_chunk_lines.append(cleaned_line)

    # Add the last remaining chunk
    if current_chunk_lines:
        chunks.append("\n".join(current_chunk_lines).strip())

    return [{'source': filename, 'content': chunk} for chunk in chunks if len(chunk) > 20]

def build_semantic_index(chunks_with_source, model):
    """Builds a FAISS index from a list of chunk dictionaries."""
    if not chunks_with_source:
        return None, None
    text_chunks = [chunk['content'] for chunk in chunks_with_source]
    embeddings = model.encode(text_chunks)
    embeddings_np = np.array(embeddings).astype('float32')
    index = faiss.IndexFlatL2(embeddings_np.shape[1])
    index.add(embeddings_np)
    return index

def search(query, model, index, all_chunks_with_source, top_k=3):
    """Searches the index and returns the original chunk dictionaries."""
    if index is None:
        return []
    query_embedding = model.encode([query])
    query_embedding_np = np.array(query_embedding).astype('float32')
    distances, indices = index.search(query_embedding_np, top_k)
    return [all_chunks_with_source[i] for i in indices[0]]

async def answer_question_with_gemini(question, context_chunks):
    """Uses the Gemini 2.5 Flash model via the loaded API key."""
    if not GEMINI_API_KEY:
        return "ERROR: Gemini API key is missing. Please create a .env file and add your GEMINI_API_KEY."

    if not context_chunks:
        return "No relevant information found across the uploaded resumes."

    context = "\n\n---\n\n".join([f"CONTEXT from resume '{c['source']}':\n{c['content']}" for c in context_chunks])
    system_prompt = "You are an expert HR assistant. Answer questions strictly based on the provided resume context. If information is missing, state that clearly. For lists like skills, use bullet points."
    user_prompt = f"{context}\n\nQUESTION: {question}"

    api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-05-20:generateContent?key={GEMINI_API_KEY}"
    payload = {
        "contents": [{"parts": [{"text": user_prompt}]}],
        "systemInstruction": {"parts": [{"text": system_prompt}]},
        "generationConfig": {"temperature": 0.1, "topP": 0.9, "maxOutputTokens": 800}
    }

    max_retries = 3
    delay = 1
    async with aiohttp.ClientSession() as session:
        for attempt in range(max_retries):
            try:
                async with session.post(api_url, json=payload, headers={'Content-Type': 'application/json'}) as response:
                    if response.status == 200:
                        result = await response.json()
                        candidate = result.get('candidates', [{}])[0]
                        if candidate.get('content', {}).get('parts', [{}])[0].get('text'):
                            return candidate['content']['parts'][0]['text']
                        else:
                            return f"AI returned an empty response. Reason: {candidate.get('finishReason', 'Unknown Error')}"
                    elif response.status == 429:
                        print(f"Rate limited. Retrying in {delay}s...")
                        await asyncio.sleep(delay)
                        delay *= 2
                    else:
                        error_text = await response.text()
                        print(f"API Error: {response.status} - {error_text}")
                        return f"Error: API request failed with status: {response.status}. Check your API key and billing."
            except aiohttp.ClientError as e:
                print(f"Network Error (Attempt {attempt+1}/{max_retries}): {e}")
                await asyncio.sleep(delay)
                delay *= 2

    return "Sorry, the AI service is currently unavailable. Please try again later."

# --- 4. Main Application Logic & Gradio UI ---
class MultiResumeChatbot:
    """A class to manage the state of the chatbot application."""
    def __init__(self):
        print("Loading embedding model...")
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.all_chunks_with_source = []
        self.faiss_index = None
        print("Model loaded. Ready to launch UI.")

    def index_resumes(self, file_paths):
        """Processes uploaded files and builds the search index."""
        if not file_paths:
            return "Please upload at least one resume file."

        self.all_chunks_with_source = []
        for temp_file in file_paths:
            filename = os.path.basename(temp_file.name)
            print(f"Parsing '{filename}'...")
            chunks = load_and_parse_resume_by_line(temp_file.name, filename)
            self.all_chunks_with_source.extend(chunks)

        if not self.all_chunks_with_source:
            return "Could not parse any text from the provided files. Check PDF format."

        print(f"Building index for {len(self.all_chunks_with_source)} chunks...")
        self.faiss_index = build_semantic_index(self.all_chunks_with_source, self.embedding_model)
        return f"✅ Successfully indexed {len(file_paths)} resumes. Ready for questions."

    async def query_and_answer(self, question):
        """Handles the search and generation process for a user query."""
        if self.faiss_index is None:
            return "Please upload and index resumes first."

        print(f"Searching for context for question: '{question}'")
        relevant_chunks = search(question, self.embedding_model, self.faiss_index, self.all_chunks_with_source)

        print("Context found. Asking Gemini...")
        return await answer_question_with_gemini(question, relevant_chunks)

def create_chatbot_interface():
    """Creates and configures the Gradio web interface."""
    chatbot_instance = MultiResumeChatbot()

    with gr.Blocks(theme=gr.themes.Soft(), title="Multi-Resume Chatbot") as demo:
        gr.Markdown("<h1>🤖 AI-Powered Multi-Resume Chatbot</h1>")
        gr.Markdown("Upload one or more resumes, click 'Index Resumes', then ask your questions below.")

        with gr.Row():
            with gr.Column(scale=1):
                file_uploader = gr.File(label="Upload Resumes", file_count="multiple", file_types=[".pdf"])
                index_button = gr.Button("Index Resumes", variant="primary", icon="📈")
                status_output = gr.Markdown(value="*No resumes indexed yet.*")

            with gr.Column(scale=2):
                chatbot = gr.Chatbot(label="Chat History", height=550)
                msg = gr.Textbox(label="Your Question", placeholder="e.g., Compare the skills of the candidates...", interactive=False)
                clear = gr.Button("Clear Chat", icon="🗑️")

        def handle_indexing(files):
            if not files:
                return "Please upload files first.", gr.Textbox(interactive=False)
            status = chatbot_instance.index_resumes(files)
            return status, gr.Textbox(interactive=True)

        async def user(user_message, history):
            return "", history + [[user_message, None]]

        async def bot(history):
            user_message = history[-1][0]
            bot_message = await chatbot_instance.query_and_answer(user_message)
            history[-1][1] = bot_message
            return history

        index_button.click(handle_indexing, inputs=[file_uploader], outputs=[status_output, msg])

        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
            bot, chatbot, chatbot
        )

        def clear_chat():
            return None, ""

        clear.click(clear_chat, None, [chatbot, msg], queue=False)

    return demo

# --- 5. Launch the Application ---
if __name__ == "__main__":
    if not GEMINI_API_KEY:
        print("="*50)
        print("🔴 WARNING: GEMINI_API_KEY not found in .env file!")
        print("Please create a file named .env and add your key.")
        print("Example .env content: GEMINI_API_KEY=\"AIzaSy...\"")
        print("="*50)
    else:
        try:
            interface = create_chatbot_interface()
            interface.launch(share=True, debug=True)
        except Exception as e:
            print(f"An error occurred: {e}")





Please create a file named .env and add your key.
Example .env content: GEMINI_API_KEY="AIzaSy..."
