In [None]:
!pip install PyPDF2 python-docx
print("Ready to analyze resumes!")

In [None]:
import pickle
import os
import re
from sklearn.metrics.pairwise import cosine_similarity
import PyPDF2
import docx
import warnings

warnings.filterwarnings('ignore')
print("Libraries imported successfully!")

In [None]:
print("Loading trained model...")

# Load model
with open('models/resume_scanner_model.pkl', 'rb') as f:
    model_data = pickle.load(f)

# Extract components
vectorizer = model_data['vectorizer']
skill_database = model_data['skill_database']
training_stats = model_data['training_stats']

print("Model loaded successfully!")
print(f"\nModel Info:")
print(f"  Skills in database: {len(skill_database):,}")
print(f"  TF-IDF vocabulary: {training_stats['vocabulary_size']:,} terms")

In [None]:
def read_pdf(file_path):
    """Extract text from PDF file"""
    text = ""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text() or ""
    except Exception as e:
        print(f"Error reading PDF: {e}")
    return text

def read_docx(file_path):
    """Extract text from DOCX file"""
    text = ""
    try:
        doc = docx.Document(file_path)
        text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
    except Exception as e:
        print(f"Error reading DOCX: {e}")
    return text

def read_txt(file_path):
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading TXT: {e}")
        return ""

def read_resume_file(file_path):
    """Read resume from any supported format"""
    ext = os.path.splitext(file_path)[1].lower()
    
    if ext == '.pdf':
        return read_pdf(file_path)
    elif ext == '.docx':
        return read_docx(file_path)
    elif ext == '.txt':
        return read_txt(file_path)
    else:
        raise ValueError(f"Unsupported file format: {ext}. Use PDF, DOCX, or TXT.")

print("Resume reading functions defined!")

In [None]:
def extract_email(text):
    """Extract email address from text"""
    pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    emails = re.findall(pattern, text)
    return emails[0] if emails else None

def extract_phone(text):
    """Extract phone number from text"""
    patterns = [
        r'\+?\d{1,3}[-\.\s]?\(?\d{3}\)?[-\.\s]?\d{3}[-\.\s]?\d{4}',
        r'\(?\d{3}\)?[-\.\s]?\d{3}[-\.\s]?\d{4}',
        r'\d{10}'
    ]
    for pattern in patterns:
        phones = re.findall(pattern, text)
        if phones:
            return phones[0]
    return None

def extract_skills(text, skill_db):
    """Extract skills from resume text using skill database"""
    text_lower = text.lower()
    found_skills = []
    
    for skill in skill_db:
        if skill in text_lower:
            found_skills.append(skill)
    
    return sorted(set(found_skills))

def extract_experience_years(text):
    """Estimate years of experience from resume"""
    patterns = [
        r'(\d+)\+?\s*years?\s*(?:of)?\s*experience',
        r'experience[:\s]+(\d+)\+?\s*years?'
    ]
    
    for pattern in patterns:
        matches = re.findall(pattern, text.lower())
        if matches:
            return int(matches[0])
    
    return 0

print(" Information extraction functions defined!")

In [None]:
def calculate_skill_match(resume_skills, required_skills):
    """Calculate skill match percentage"""
    if not required_skills:
        return 100.0, [], []
    
    resume_skills_lower = [s.lower() for s in resume_skills]
    required_skills_lower = [s.lower() for s in required_skills]
    
    matched = [s for s in required_skills_lower if s in resume_skills_lower]
    missing = [s for s in required_skills_lower if s not in resume_skills_lower]
    
    score = (len(matched) / len(required_skills_lower)) * 100
    
    return score, matched, missing

def calculate_text_similarity(resume_text, job_description):
    """Calculate cosine similarity between resume and job description"""
    try:
        docs = [resume_text.lower(), job_description.lower()]
        tfidf_matrix = vectorizer.transform(docs)
        similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
        return similarity * 100
    except Exception as e:
        print(f"Similarity calculation error: {e}")
        return 0.0

def calculate_ats_score(resume_text, resume_skills, job_description, required_skills):
    """Calculate comprehensive ATS score"""
    # Skill match
    skill_score, matched, missing = calculate_skill_match(resume_skills, required_skills)
    
    # Text similarity
    text_sim = calculate_text_similarity(resume_text, job_description)
    
    # Experience (optional - can be enhanced)
    exp_years = extract_experience_years(resume_text)
    
    # Weighted final score (60% skills, 40% text similarity)
    overall = (skill_score * 0.6) + (text_sim * 0.4)
    
    return {
        'overall_score': round(overall, 2),
        'skill_match_score': round(skill_score, 2),
        'text_similarity_score': round(text_sim, 2),
        'matched_skills': matched,
        'missing_skills': missing,
        'experience_years': exp_years,
        'total_skills_found': len(resume_skills)
    }

print(" ATS scoring functions defined!")

In [None]:
def analyze_resume(file_path, job_description="", required_skills=None):
    """Complete resume analysis pipeline"""
    
    print("=" * 70)
    print("AI RESUME SCANNER - ANALYSIS")
    print("=" * 70)
    
    # Read resume
    print(f"\n[1/4] Reading resume file: {os.path.basename(file_path)}")
    resume_text = read_resume_file(file_path)
    print(f" Resume loaded ({len(resume_text)} characters)")
    
    # Extract basic info
    print("\n[2/4] Extracting information...")
    email = extract_email(resume_text)
    phone = extract_phone(resume_text)
    skills = extract_skills(resume_text, skill_database)
    exp_years = extract_experience_years(resume_text)
    
    print(f" Email: {email or 'Not found'}")
    print(f" Phone: {phone or 'Not found'}")
    print(f" Skills found: {len(skills)}")
    print(f" Experience: {exp_years} years")
    
    # Display skills
    print("\n[3/4] Skills identified:")
    if skills:
        for i, skill in enumerate(skills[:20], 1):
            print(f"  {i:2d}. {skill}")
        if len(skills) > 20:
            print(f"  ... and {len(skills) - 20} more")
    else:
        print("  No skills matched from database")
    
    # Calculate ATS score if job description provided
    if job_description and required_skills:
        print("\n[4/4] Calculating ATS score...")
        results = calculate_ats_score(resume_text, skills, job_description, required_skills)
        
        print("\n" + "=" * 70)
        print("ATS SCORE RESULTS")
        print("=" * 70)
        print(f"\nOverall Score:        {results['overall_score']}%")
        print(f"Skill Match:          {results['skill_match_score']}%")
        print(f"Text Similarity:      {results['text_similarity_score']}%")
        print(f"Experience:           {results['experience_years']} years")
        print(f"Total Skills Found:   {results['total_skills_found']}")
        
        print(f"\n MATCHED SKILLS ({len(results['matched_skills'])}):")
        if results['matched_skills']:
            for skill in results['matched_skills']:
                print(f"    {skill}")
        else:
            print("   None")
        
        print(f"\n MISSING SKILLS ({len(results['missing_skills'])}):")
        if results['missing_skills']:
            for skill in results['missing_skills']:
                print(f"   âœ— {skill}")
        else:
            print("   None - All required skills present!")
        
        # Recommendation
        print(f"\nðŸ’¡ RECOMMENDATION:")
        if results['overall_score'] >= 80:
            print("   Excellent match! Highly recommended for this position.")
        elif results['overall_score'] >= 60:
            print("   Good match! Consider for interview.")
        elif results['overall_score'] >= 40:
            print("   Fair match. May need additional training or skills.")
        else:
            print("   Low match. Not recommended for this position.")
        
        print("\n" + "=" * 70)
        
        return results
    else:
        print("\nBasic analysis complete!")
        print("  (Provide job_description and required_skills for ATS scoring)")
        return {'skills': skills, 'email': email, 'phone': phone, 'experience': exp_years}

print("Resume analysis function ready!")

## Analyze Your Resume

Change the `resume_file_path` to point to your actual resume file!

In [None]:

resume_file_path = r"resume_file_path.pdf"  # 

# Job description
job_description = """
We are looking for a Senior Software Engineer with strong experience in:
- Python and Java programming
- Web development using React and Node.js
- Database management (SQL, MongoDB)
- Cloud platforms (AWS, Azure)
- DevOps tools (Docker, Kubernetes)
- Machine learning and data analysis

Minimum 3-5 years of experience required.
"""

# Required skills for the job
required_skills = [
    'python', 'java', 'javascript', 'react', 'nodejs',
    'sql', 'mongodb', 'aws', 'azure', 'docker', 'kubernetes',
    'machine learning', 'data analysis'
]

# ============================================

# Check if file exists
if os.path.exists(resume_file_path):
    # Run analysis
    results = analyze_resume(
        file_path=resume_file_path,
        job_description=job_description,
        required_skills=required_skills
    )
else:
    print(f"\nFile not found: {resume_file_path}")
    print("1. Place a resume file (PDF/DOCX/TXT) in the project folder")
    print("2. Update the 'resume_file_path' variable above")

## Multiple Resumes

**Analyzes multiple resumes :** 

In [None]:
def batch_analyze_resumes(resume_folder, job_description, required_skills):
    """Analyze multiple resumes from a folder"""
    
    if not os.path.exists(resume_folder):
        print(f"Folder not found: {resume_folder}")
        return None
    
    # Get all resume files
    resume_files = []
    for file in os.listdir(resume_folder):
        if file.endswith(('.pdf', '.docx', '.txt')):
            resume_files.append(os.path.join(resume_folder, file))
    
    if not resume_files:
        print("No resume files found in folder")
        return None
    
    print(f"Found {len(resume_files)} resumes to analyze\n")
    
    results_list = []
    
    for i, file_path in enumerate(resume_files, 1):
        print(f"\nAnalyzing {i}/{len(resume_files)}: {os.path.basename(file_path)}")
        print("-" * 50)
        
        try:
            resume_text = read_resume_file(file_path)
            skills = extract_skills(resume_text, skill_database)
            results = calculate_ats_score(resume_text, skills, job_description, required_skills)
            
            results_list.append({
                'filename': os.path.basename(file_path),
                'overall_score': results['overall_score'],
                'skill_match': results['skill_match_score'],
                'text_similarity': results['text_similarity_score'],
                'matched_skills': len(results['matched_skills']),
                'total_skills': results['total_skills_found']
            })
            
            print(f" Score: {results['overall_score']}%")
        
        except Exception as e:
            print(f"Error: {e}")
    
    # Sort by overall score
    results_list.sort(key=lambda x: x['overall_score'], reverse=True)
    
    # Display ranking
    print("\n" + "=" * 70)
    print("CANDIDATE RANKING")
    print("=" * 70)
    print(f"\n{'Rank':<6} {'Score':<10} {'Filename':<30}")
    print("-" * 70)
    
    for i, result in enumerate(results_list, 1):
        print(f"{i:<6} {result['overall_score']:<10.2f} {result['filename']:<30}")
    
    return results_list

# Example usage (uncomment to use)
# resumes_folder = "resumes"  # Folder containing multiple resume files
# batch_results = batch_analyze_resumes(resumes_folder, job_description, required_skills)

print("Batch analysis function ready!")
print("  Create a 'resumes' folder and add multiple resume files to test")