In [2]:
import numpy as np
import pandas as pd
from datetime import datetime

class HeuristicScorer:
    def __init__(self):
        # 1. Define Weights (Tunable Hyperparameters)
        self.weights = {
            'Full-time': 1.0,
            'Freelance': 0.8,
            'Internship': 0.7
        }
        
        # 2. Decay Constant for Recency (Lambda)
        # lambda = ln(2) / half_life
        # If we want a "half-life" of 5 years (score drops to 0.5 after 5 years):
        self.decay_lambda = 0.693 / 5.0 

    def calculate_duration_months(self, start_date, end_date):
        """Calculates duration in months between two dates."""
        if not start_date or not end_date:
            return 0
        
        # Standardize format (assuming 'YYYY-MM')
        fmt = "%Y-%m"
        try:
            start = datetime.strptime(start_date, fmt)
            # Handle "Present" or "Current"
            if end_date.lower() in ['present', 'current', 'now']:
                end = datetime.now()
            else:
                end = datetime.strptime(end_date, fmt)
                
            return (end.year - start.year) * 12 + (end.month - start.month)
        except ValueError:
            return 0

    def calculate_recency_score(self, end_date):
        """
        Applies Exponential Decay based on how long ago the job ended.
        Formula: C = e^(-lambda * t)
        """
        if not end_date or end_date.lower() in ['present', 'current', 'now']:
            return 1.0 # Current jobs are perfectly recent

        fmt = "%Y-%m"
        try:
            end = datetime.strptime(end_date, fmt)
            current_date = datetime.now()
            
            # Calculate time elapsed in years
            years_elapsed = (current_date - end).days / 365.0
            
            # Apply Exponential Decay
            recency_score = np.exp(-self.decay_lambda * years_elapsed)
            return recency_score
            
        except ValueError:
            return 0.0

    def calculate_impact_score(self, description):
        """
        Heuristic for Impact (I).
        Simple keyword-based scoring for now. In a full system, 
        this could be a secondary NLP regression model.
        """
        if not description:
            return 0.0
        
        keywords = ['lead', 'managed', 'created', 'increased', 'decreased', 
                    'saved', 'revenue', 'budget', 'launched', 'mentored']
        
        # Count occurrences of impact verbs
        desc_lower = description.lower()
        count = sum(1 for word in keywords if word in desc_lower)
        
        # Sigmoid-like normalization to cap score at 1.0
        # 0 keywords = 0.0, 5+ keywords ~= 1.0
        return 1 - np.exp(-0.5 * count)

    def normalize_durations(self, experience_entries):
        """
        Min-Max normalization for duration across all entries in the resume.
        """
        durations = [entry['duration_months'] for entry in experience_entries]
        
        if not durations:
            return experience_entries
            
        max_dur = max(durations)
        min_dur = min(durations)
        
        # Avoid division by zero
        if max_dur == min_dur:
            for entry in experience_entries:
                entry['normalized_duration'] = 1.0
        else:
            for entry in experience_entries:
                entry['normalized_duration'] = (entry['duration_months'] - min_dur) / (max_dur - min_dur)
                
        return experience_entries

    def score_resume(self, experience_entries, bert_relevance_scores):
        """
        Main function to score a candidate.
        
        Args:
            experience_entries (list): List of dicts containing raw resume data.
            bert_relevance_scores (list): Corresponding BERT similarity scores (0-1).
        """
        
        # Step 1: Pre-calculate raw durations and simple metrics
        for i, entry in enumerate(experience_entries):
            entry['duration_months'] = self.calculate_duration_months(entry['start_date'], entry['end_date'])
            entry['bert_relevance'] = bert_relevance_scores[i]

        # Step 2: Normalize durations relative to the candidate's own history
        # (Or you can normalize relative to the entire dataset if available)
        experience_entries = self.normalize_durations(experience_entries)

        total_score = 0
        
        print(f"{'Role':<20} | {'Type':<10} | {'Dur(Norm)':<10} | {'Recency':<10} | {'Impact':<10} | {'BERT':<10} | {'FINAL'}")
        print("-" * 100)

        for entry in experience_entries:
            # A. Get Type Weight (Wt)
            w_t = self.weights.get(entry['employment_type'], 0.5) # Default to 0.5 if unknown
            
            # B. Get Normalized Duration (D)
            d_norm = entry['normalized_duration']
            
            # C. Calculate Recency (C)
            c_score = self.calculate_recency_score(entry['end_date'])
            
            # D. Calculate Impact (I)
            i_score = self.calculate_impact_score(entry['description'])
            
            # E. BERT Relevance (R)
            r_score = entry['bert_relevance']
            
            # --- FINAL FORMULA ---
            # Score = Wt * D * R * I * C
            # Note: We add a small epsilon to I and D to prevent zeroing out entirely if desired,
            # but strictly following your formula:
            final_entry_score = w_t * d_norm * r_score * i_score * c_score
            
            total_score += final_entry_score
            
            print(f"{entry['job_title']:<20} | {entry['employment_type']:<10} | {d_norm:.2f}       | {c_score:.2f}       | {i_score:.2f}       | {r_score:.2f}       | {final_entry_score:.4f}")

        return total_score

# --- EXAMPLE USAGE ---

# Sample Data (Mocking the JSON output from Section 3.3)
sample_data = [
    {
        "job_title": "Senior Dev",
        "start_date": "2020-01",
        "end_date": "Present",
        "employment_type": "Full-time",
        "description": "Led a team of 5, increased revenue by 20%, managed cloud infrastructure."
    },
    {
        "job_title": "Web Intern",
        "start_date": "2019-01",
        "end_date": "2019-06",
        "employment_type": "Internship",
        "description": "Fixed bugs and learned React."
    }
]

# Mock BERT scores (Input from your Semantic Module)
bert_scores = [0.85, 0.60] 

scorer = HeuristicScorer()
final_candidate_score = scorer.score_resume(sample_data, bert_scores)

print(f"\nTotal Candidate Score: {final_candidate_score:.4f}")

Role                 | Type       | Dur(Norm)  | Recency    | Impact     | BERT       | FINAL
----------------------------------------------------------------------------------------------------
Senior Dev           | Full-time  | 1.00       | 1.00       | 0.78       | 0.85       | 0.6603
Web Intern           | Internship | 0.00       | 0.40       | 0.00       | 0.60       | 0.0000

Total Candidate Score: 0.6603
