In [None]:
# ========================================
# 🔍 AI-POWERED RESUME SCREENER
# An Intelligent Candidate Evaluation System using Deep Learning and Vector Similarity Search
# ========================================

print("🚀 Starting AI-Powered Resume Screener Setup...")
print("📋 Features: PDF upload, Semantic search, AI analysis, Q&A system")
print("🔧 Technologies: FAISS, Transformers, Gradio, RAG Pipeline")
print("-" * 60)

# ========================================
# STEP 1: INSTALL REQUIRED PACKAGES
# ========================================
print("📦 Installing required packages...")

!pip install gradio==3.40.0
!pip install faiss-cpu==1.7.4
!pip install PyPDF2==3.0.1
!pip install transformers==4.21.0
!pip install sentence-transformers==2.2.2
!pip install numpy==1.24.0
!pip install requests

print("✅ All packages installed successfully!")
print("-" * 60)

# ========================================
# STEP 2: IMPORT LIBRARIES
# ========================================
print("📚 Importing required libraries...")

import gradio as gr
import PyPDF2
import faiss
import numpy as np
import requests
import json
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import io
import tempfile
import os
from typing import List, Dict, Tuple, Optional
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("✅ Libraries imported successfully!")
print("-" * 60)

# ========================================
# STEP 3: NVIDIA API CONFIGURATION
# ========================================
print("🔑 Configuring NVIDIA API...")

# ⚠️ IMPORTANT: Replace with your actual NVIDIA API key
NVIDIA_API_KEY = "your-nvidia-api-key-here"  # 🔑 Add your API key here

# Verify API key is set
if NVIDIA_API_KEY == "your-nvidia-api-key-here":
    print("⚠️ WARNING: Please set your NVIDIA API key above!")
    print("Replace 'your-nvidia-api-key-here' with your actual API key")
else:
    print("✅ NVIDIA API key configured successfully!")

print("-" * 60)

# ========================================
# STEP 4: LOAD MACHINE LEARNING MODELS
# ========================================
print("🤖 Loading machine learning models...")

# Sentence transformer for embeddings
print("Loading embedding model...")
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
print("✅ Embedding model loaded")

# Summarization model
print("Loading summarization model...")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
print("✅ Summarization model loaded")

# Question-answering model
print("Loading Q&A model...")
qa_model = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad')
print("✅ Q&A model loaded")

print("🎉 All models loaded successfully!")
print("-" * 60)

# ========================================
# STEP 5: DEFINE RESUME INDEX CLASS
# ========================================
print("🗂️ Defining ResumeIndex class...")

class ResumeIndex:
    """FAISS-based resume indexing system for semantic similarity search"""
    
    def __init__(self, embedding_model):
        self.embedding_model = embedding_model
        self.dimension = 384  # all-MiniLM-L6-v2 embedding dimension
        self.index = faiss.IndexFlatL2(self.dimension)
        self.resumes = []  # Store resume data
        self.candidate_names = []  # Store candidate names
        
    def add_resume(self, text: str, candidate_name: str) -> None:
        """Add a resume to the index"""
        try:
            # Generate embedding
            embedding = self.embedding_model.encode([text])
            
            # Add to FAISS index
            self.index.add(embedding.astype('float32'))
            
            # Store resume data
            self.resumes.append(text)
            self.candidate_names.append(candidate_name)
            
            logger.info(f"Added resume for {candidate_name}")
            
        except Exception as e:
            logger.error(f"Error adding resume: {e}")
            raise
    
    def search(self, query: str, k: int = 5) -> List[Tuple[str, str, float]]:
        """Search for similar resumes"""
        try:
            if self.index.ntotal == 0:
                return []
            
            # Generate query embedding
            query_embedding = self.embedding_model.encode([query])
            
            # Search in FAISS index
            k = min(k, self.index.ntotal)
            scores, indices = self.index.search(query_embedding.astype('float32'), k)
            
            # Format results
            results = []
            for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
                if idx != -1:  # Valid index
                    similarity = 1 / (1 + score)  # Convert distance to similarity
                    results.append((
                        self.candidate_names[idx],
                        self.resumes[idx],
                        similarity
                    ))
            
            return results
            
        except Exception as e:
            logger.error(f"Error searching: {e}")
            return []
    
    def get_stats(self) -> Dict:
        """Get index statistics"""
        return {
            "total_resumes": self.index.ntotal,
            "dimension": self.dimension,
            "candidates": list(self.candidate_names)
        }

print("✅ ResumeIndex class defined")

# ========================================
# STEP 6: DEFINE RAG PIPELINE CLASS
# ========================================
print("🔄 Defining RAGPipeline class...")

class RAGPipeline:
    """Retrieval-Augmented Generation pipeline for resume analysis"""
    
    def __init__(self, resume_index, summarizer, qa_model):
        self.resume_index = resume_index
        self.summarizer = summarizer
        self.qa_model = qa_model
    
    def summarize_resume(self, resume_text: str) -> str:
        """Generate resume summary"""
        try:
            # Limit text length for summarization
            max_length = 1000
            if len(resume_text) > max_length:
                resume_text = resume_text[:max_length]
            
            # Generate summary
            summary = self.summarizer(
                resume_text,
                max_length=150,
                min_length=50,
                do_sample=False
            )
            
            return summary[0]['summary_text']
            
        except Exception as e:
            logger.error(f"Error in summarization: {e}")
            return "Unable to generate summary"
    
    def answer_question(self, question: str, context: str) -> str:
        """Answer questions about resume content"""
        try:
            # Limit context length
            max_context_length = 500
            if len(context) > max_context_length:
                context = context[:max_context_length]
            
            # Generate answer
            result = self.qa_model(
                question=question,
                context=context
            )
            
            return result['answer']
            
        except Exception as e:
            logger.error(f"Error in Q&A: {e}")
            return "Unable to generate answer"
    
    def query_resumes(self, query: str) -> str:
        """Query the resume database and provide comprehensive answers"""
        try:
            # Search for relevant resumes
            results = self.resume_index.search(query, k=3)
            
            if not results:
                return "No relevant resumes found in the database."
            
            # Format response
            response = f"Found {len(results)} relevant candidates:\n\n"
            
            for i, (name, resume_text, similarity) in enumerate(results, 1):
                response += f"**{i}. {name}** (Similarity: {similarity:.2f})\n"
                
                # Generate answer for this specific resume
                answer = self.answer_question(query, resume_text)
                response += f"• {answer}\n\n"
            
            return response
            
        except Exception as e:
            logger.error(f"Error in query processing: {e}")
            return "Error processing query"

print("✅ RAGPipeline class defined")

# ========================================
# STEP 7: DEFINE UTILITY FUNCTIONS
# ========================================
print("🛠️ Defining utility functions...")

def extract_text_from_pdf(file_path: str) -> str:
    """Extract text from PDF file"""
    try:
        text = ""
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            
            for page in pdf_reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
        
        if not text.strip():
            return "Error: No text could be extracted from the PDF"
        
        return text.strip()
        
    except Exception as e:
        logger.error(f"Error extracting PDF text: {e}")
        return f"Error extracting text: {str(e)}"

def process_resume(file_obj, candidate_name: str = "") -> str:
    """Process uploaded resume file"""
    try:
        if file_obj is None:
            return "No file uploaded"
        
        # Handle different file object types
        if hasattr(file_obj, 'name'):
            file_path = file_obj.name
        else:
            # For gradio file objects without direct path access
            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
                if hasattr(file_obj, 'read'):
                    tmp_file.write(file_obj.read())
                else:
                    # Handle string paths
                    with open(str(file_obj), 'rb') as src:
                        tmp_file.write(src.read())
                file_path = tmp_file.name
        
        # Extract text
        text = extract_text_from_pdf(file_path)
        
        # Clean up temp file if created
        if 'tmp_file' in locals():
            os.unlink(file_path)
        
        return text
        
    except Exception as e:
        logger.error(f"Error processing resume: {e}")
        return f"Error processing file: {str(e)}"

print("✅ Utility functions defined")

# ========================================
# STEP 8: INITIALIZE SYSTEM
# ========================================
print("⚙️ Initializing AI Resume Screener system...")

# Create instances
resume_index = ResumeIndex(embedding_model)
rag_pipeline = RAGPipeline(resume_index, summarizer, qa_model)

print("✅ System initialized successfully!")
print("-" * 60)

# ========================================
# STEP 9: DEFINE GRADIO INTERFACE FUNCTIONS
# ========================================
print("🎨 Defining Gradio interface functions...")

def upload_and_index_resume(file, candidate_name):
    """Handle resume upload and indexing"""
    try:
        if file is None:
            return "Please select a file to upload."
        
        if not candidate_name.strip():
            candidate_name = f"Candidate_{len(resume_index.candidate_names) + 1}"
        
        # Process the resume
        text = process_resume(file, candidate_name)
        
        if text.startswith("Error") or text.startswith("No file"):
            return f"❌ {text}"
        
        # Add to index
        resume_index.add_resume(text, candidate_name)
        
        return f"✅ Successfully uploaded and indexed resume for {candidate_name}\n\nExtracted text preview:\n{text[:500]}..."
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

def search_candidates(query):
    """Search for candidates based on query"""
    try:
        if not query.strip():
            return "Please enter a search query."
        
        results = resume_index.search(query, k=5)
        
        if not results:
            return "No matching candidates found. Please try a different query or upload more resumes."
        
        response = f"🔍 Found {len(results)} matching candidates:\n\n"
        
        for i, (name, resume_text, similarity) in enumerate(results, 1):
            response += f"**{i}. {name}**\n"
            response += f"📊 Similarity Score: {similarity:.3f}\n"
            response += f"📄 Resume Preview: {resume_text[:200]}...\n"
            response += "-" * 50 + "\n\n"
        
        return response
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

def get_resume_summary(candidate_name):
    """Get summary for a specific candidate"""
    try:
        if candidate_name not in resume_index.candidate_names:
            available = ", ".join(resume_index.candidate_names) if resume_index.candidate_names else "None"
            return f"Candidate not found. Available candidates: {available}"
        
        idx = resume_index.candidate_names.index(candidate_name)
        resume_text = resume_index.resumes[idx]
        
        summary = rag_pipeline.summarize_resume(resume_text)
        
        return f"📋 Summary for {candidate_name}:\n\n{summary}"
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

def query_system(question):
    """Query the system using RAG pipeline"""
    try:
        if not question.strip():
            return "Please enter a question."
        
        response = rag_pipeline.query_resumes(question)
        return f"🤖 AI Response:\n\n{response}"
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

def get_system_stats():
    """Get system statistics"""
    try:
        stats = resume_index.get_stats()
        
        response = "📊 **System Statistics**\n\n"
        response += f"📄 Total Resumes: {stats['total_resumes']}\n"
        response += f"🔢 Vector Dimension: {stats['dimension']}\n\n"
        
        if stats['candidates']:
            response += "👥 **Indexed Candidates:**\n"
            for i, candidate in enumerate(stats['candidates'], 1):
                response += f"{i}. {candidate}\n"
        else:
            response += "No candidates indexed yet.\n"
        
        return response
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

print("✅ Gradio interface functions defined")

# ========================================
# STEP 10: CREATE GRADIO INTERFACE
# ========================================
print("🎨 Creating Gradio web interface...")

def create_interface():
    """Create the Gradio web interface"""
    
    with gr.Blocks(title="AI Resume Screener", theme=gr.themes.Soft()) as interface:
        
        gr.Markdown("""
        # 🔍 AI-Powered Resume Screener
        
        **An intelligent system for resume screening using AI and vector similarity search**
        
        ### 🚀 Features:
        - Upload and index PDF resumes
        - Semantic similarity search
        - AI-powered resume analysis
        - Intelligent question answering
        
        ---
        """)
        
        with gr.Tabs():
            
            # Upload Tab
            with gr.TabItem("📤 Upload Resume"):
                gr.Markdown("### Upload and Index Resume")
                
                with gr.Row():
                    with gr.Column():
                        file_input = gr.File(
                            label="Select PDF Resume",
                            file_types=[".pdf"],
                            type="filepath"
                        )
                        name_input = gr.Textbox(
                            label="Candidate Name (optional)",
                            placeholder="Enter candidate name"
                        )
                        upload_btn = gr.Button("📤 Upload & Index", variant="primary")
                    
                    with gr.Column():
                        upload_output = gr.Textbox(
                            label="Upload Results",
                            lines=10,
                            interactive=False
                        )
                
                upload_btn.click(
                    upload_and_index_resume,
                    inputs=[file_input, name_input],
                    outputs=upload_output
                )
            
            # Search Tab
            with gr.TabItem("🔍 Search Candidates"):
                gr.Markdown("### Search for Candidates")
                
                with gr.Row():
                    with gr.Column():
                        search_input = gr.Textbox(
                            label="Search Query",
                            placeholder="e.g., Python developer with machine learning experience",
                            lines=3
                        )
                        search_btn = gr.Button("🔍 Search", variant="primary")
                    
                    with gr.Column():
                        search_output = gr.Textbox(
                            label="Search Results",
                            lines=12,
                            interactive=False
                        )
                
                search_btn.click(
                    search_candidates,
                    inputs=search_input,
                    outputs=search_output
                )
            
            # Summary Tab
            with gr.TabItem("📋 Resume Summary"):
                gr.Markdown("### Get Resume Summary")
                
                with gr.Row():
                    with gr.Column():
                        candidate_dropdown = gr.Dropdown(
                            label="Select Candidate",
                            choices=[],
                            interactive=True
                        )
                        summary_btn = gr.Button("📋 Get Summary", variant="primary")
                        refresh_btn = gr.Button("🔄 Refresh Candidates")
                    
                    with gr.Column():
                        summary_output = gr.Textbox(
                            label="Resume Summary",
                            lines=10,
                            interactive=False
                        )
                
                def refresh_candidates():
                    return gr.Dropdown(choices=resume_index.candidate_names)
                
                refresh_btn.click(
                    refresh_candidates,
                    outputs=candidate_dropdown
                )
                
                summary_btn.click(
                    get_resume_summary,
                    inputs=candidate_dropdown,
                    outputs=summary_output
                )
            
            # Q&A Tab
            with gr.TabItem("🤖 AI Query"):
                gr.Markdown("### Ask Questions About Resumes")
                
                with gr.Row():
                    with gr.Column():
                        question_input = gr.Textbox(
                            label="Your Question",
                            placeholder="e.g., Which candidates have experience with Python?",
                            lines=3
                        )
                        query_btn = gr.Button("🤖 Ask AI", variant="primary")
                        
                        gr.Markdown("""
                        **Example Questions:**
                        - Which candidates have Python experience?
                        - Who has worked with machine learning?
                        - Find candidates with leadership experience
                        - Which candidates have a PhD?
                        """)
                    
                    with gr.Column():
                        query_output = gr.Textbox(
                            label="AI Response",
                            lines=12,
                            interactive=False
                        )
                
                query_btn.click(
                    query_system,
                    inputs=question_input,
                    outputs=query_output
                )
            
            # Stats Tab
            with gr.TabItem("📊 Statistics"):
                gr.Markdown("### System Statistics")
                
                with gr.Row():
                    with gr.Column():
                        stats_btn = gr.Button("📊 Get Statistics", variant="primary")
                    
                    with gr.Column():
                        stats_output = gr.Textbox(
                            label="System Stats",
                            lines=10,
                            interactive=False
                        )
                
                stats_btn.click(
                    get_system_stats,
                    outputs=stats_output
                )
        
        gr.Markdown("""
        ---
        
        ### 📝 Instructions:
        1. **Upload**: Start by uploading PDF resumes in the Upload tab
        2. **Search**: Use semantic search to find candidates matching specific criteria
        3. **Summary**: Get AI-generated summaries of individual resumes
        4. **Query**: Ask natural language questions about your candidate database
        5. **Stats**: Monitor system statistics and indexed candidates
        
        ### ⚠️ Important Notes:
        - Only PDF files are supported
        - Make sure to set your NVIDIA API key in Step 3 above
        - The system uses CPU-compatible FAISS for vector search
        - First run may take longer due to model downloads
        """)
    
    return interface

print("✅ Gradio interface created")

# ========================================
# STEP 11: LAUNCH APPLICATION
# ========================================
print("🚀 Launching AI Resume Screener...")

# Create and launch interface
app = create_interface()

# Launch with public URL for Colab
app.launch(
    share=True,  # Creates public URL
    server_name="0.0.0.0",  # Allow external access
    server_port=7860,  # Default Gradio port
    show_error=True,  # Show detailed errors
    quiet=False  # Show startup messages
)

print("🎉 Application launched successfully!")
print("📱 Access the interface using the public URL shown above")
print("🔗 The public URL will be displayed below")
print("-" * 60)
print("✨ Setup Complete! Your AI Resume Screener is ready to use!")