<a href="https://colab.research.google.com/github/Awelite/AI-INTERVIEWER-COPILOT/blob/main/hybridATS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **⚓** HYBRID ATS MODEL

In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
PROJECT_ROOT = "/content/drive/MyDrive/ATS_Project_Files"
MODELS_DIR = f"{PROJECT_ROOT}/models"
SCRIPTS_DIR = f"{PROJECT_ROOT}/scripts"

print("Project Root:", PROJECT_ROOT)


Project Root: /content/drive/MyDrive/ATS_Project_Files


In [3]:
import os

for root, dirs, files in os.walk(MODELS_DIR):
    level = root.replace(MODELS_DIR, "").count(os.sep)
    indent = " " * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    for f in files:
        print(f"{indent}  - {f}")


models/
  - resume_features_summary.csv
  - ats_results_full.json
  - ats_summary.csv
  - baseline_tfidf_lr.joblib
  modernbert_finetune/
    finetuned/
    runs/
      Nov19_15-40-29_1a8659596dc9/
        - events.out.tfevents.1763566847.1a8659596dc9.2574.0
  sbert_lgbm/
    - lgbm_model.pkl
    sbert_encoder/
      - config_sentence_transformers.json
      - config.json
      - model.safetensors
      - tokenizer_config.json
      - special_tokens_map.json
      - vocab.txt
      - tokenizer.json
      - sentence_bert_config.json
      - modules.json
      - README.md
      1_Pooling/
        - config.json
      2_Normalize/


**Load ML-Based ATS (ONLY)**

In [4]:
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer

PROJECT_ROOT = "/content/drive/MyDrive/ATS_Project_Files"

# Load SBERT encoder (trained)
sbert = SentenceTransformer(
    f"{PROJECT_ROOT}/models/sbert_lgbm/sbert_encoder"
)

# Load LightGBM model
lgbm_model = joblib.load(
    f"{PROJECT_ROOT}/models/sbert_lgbm/lgbm_model.pkl"
)

print("ML-based ATS loaded successfully")




ML-based ATS loaded successfully


**Sanity Test ML Model (NO RULE ENGINE)**

In [6]:
# Sanity test ML model (Booster-compatible)

emb_r = sbert.encode(dummy_resume)
emb_j = sbert.encode(dummy_jd)

features = np.abs(emb_r - emb_j).reshape(1, -1)

# For Booster, predict() returns probability
prob = lgbm_model.predict(features)[0]

print("ML probability:", prob)


ML probability: 0.9907775038316795


# Load RULE-BASED ATS (ISOLATED SAFE LOAD)

Define Skill Vocabulary (SIMPLE & EXPLICIT)

In [3]:
!pip install -q rapidfuzz


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/3.2 MB[0m [31m8.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━[0m [32m2.3/3.2 MB[0m [31m32.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
import sys

PROJECT_ROOT = "/content/drive/MyDrive/ATS_Project_Files"

if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

print("Project root added to sys.path")


Project root added to sys.path


In [5]:
import os

init_path = f"{PROJECT_ROOT}/scripts/__init__.py"
print("scripts/__init__.py exists:", os.path.exists(init_path))


scripts/__init__.py exists: True


In [5]:
from datetime import datetime
from scripts.skill_match import compute_skill_match
import re

SKILL_VOCAB = [
    "python", "java", "c++", "machine learning", "deep learning",
    "data science", "sql", "django", "flask", "nlp",
    "tensorflow", "pytorch", "react", "node", "git", "docker"
]

def estimate_experience_score(text):
    # crude heuristic: count years mentioned
    years = re.findall(r'(\d+)\s+years?', text.lower())
    if not years:
        return 50
    max_year = max(map(int, years))
    return min(100, max_year * 10)

def estimate_formatting_score(text):
    # heuristic: length & structure
    if len(text.split()) > 300:
        return 80
    return 60

def score_resume_against_jd(resume_text, jd_text):
    skill_result = compute_skill_match(
        resume_text,
        jd_text,
        SKILL_VOCAB
    )

    skill_score = skill_result["match_percent"]
    exp_score = estimate_experience_score(resume_text)
    fmt_score = estimate_formatting_score(resume_text)

    ats_score = (
        skill_score * 0.5 +
        exp_score * 0.3 +
        fmt_score * 0.2
    )

    return {
        "ats_score": round(ats_score, 2),
        "skill_score": skill_score,
        "experience_score": exp_score,
        "formatting_score": fmt_score,
        "matched_skills": skill_result["matched_skills"],
        "missing_skills": skill_result["missing_skills"],
        "timestamp": str(datetime.now())
    }


# Import Rule-Based ATS (THIS WILL WORK)

In [6]:
from scripts.score import score_resume_against_jd
print("Rule-based ATS imported successfully")


Rule-based ATS imported successfully


In [10]:
!sed -n '1,200p' /content/drive/MyDrive/ATS_Project_Files/scripts/skill_match.py


import re

def extract_skills_from_text(text, skill_vocab):
    """
    Extracts skills mentioned in text based on predefined vocabulary.
    """
    found = []
    text_lower = text.lower()
    for skill in skill_vocab:
        pattern = r"\b" + re.escape(skill.lower()) + r"\b"
        if re.search(pattern, text_lower):
            found.append(skill)
    return sorted(list(set(found)))

def compute_skill_match(resume_text, jd_text, skill_vocab):
    """
    Compares resume and JD skills, returns match % and missing skills.
    """
    resume_skills = extract_skills_from_text(resume_text, skill_vocab)
    jd_skills = extract_skills_from_text(jd_text, skill_vocab)

    if not jd_skills:
        return {
            "jd_skills": [],
            "resume_skills": resume_skills,
            "match_percent": 0.0,
            "matched_skills": [],
            "missing_skills": []
        }

    matched = [s for s in jd_skills if s in resume_skills]
    missing = [s for s in jd_skills if s no

# Sanity Test

In [7]:
out = score_resume_against_jd(
    "Python developer with ML experience",
    "Looking for Python engineer with ML knowledge"
)

print(out)


{'ats_score': 100.0, 'matched_skills': ['python'], 'missing_skills': [], 'resume_skills': ['python'], 'jd_skills': ['python'], 'timestamp': '2025-12-23 15:20:53.743783'}


# Load Everything (Clean Cell)

In [8]:
# ---- PATH SETUP ----
import sys
PROJECT_ROOT = "/content/drive/MyDrive/ATS_Project_Files"
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

# ---- IMPORTS ----
from scripts.score import score_resume_against_jd
from sentence_transformers import SentenceTransformer
import joblib
import numpy as np




In [9]:
# Load SBERT encoder
sbert = SentenceTransformer(
    f"{PROJECT_ROOT}/models/sbert_lgbm/sbert_encoder"
)

# Load LightGBM classifier
lgbm_model = joblib.load(
    f"{PROJECT_ROOT}/models/sbert_lgbm/lgbm_model.pkl"
)

print("ML models loaded")


ML models loaded


In [12]:
def run_hybrid_ats(resume_text, jd_text):
    """
    Hybrid ATS:
    30% Rule-based + 70% ML-based
    """

    # ----- RULE-BASED ATS -----
    rule_out = score_resume_against_jd(resume_text, jd_text)
    rule_score = rule_out["ats_score"]  # 0–100

    # ----- ML-BASED ATS -----
    emb_resume = sbert.encode(resume_text)
    emb_jd = sbert.encode(jd_text)

    features = np.abs(emb_resume - emb_jd).reshape(1, -1)

    # LightGBM Booster returns probability directly
    ml_prob = float(lgbm_model.predict(features)[0])
    ml_score = ml_prob * 100

    # ----- HYBRID MERGE -----
    final_score = round(
        0.3 * rule_score + 0.7 * ml_score,
        2
    )

    return {
        "final_ats_score": final_score,
        "rule_score": round(rule_score, 2),
        "ml_probability": round(ml_prob, 4),
        "rule_details": rule_out
    }


In [13]:
resume_text = "Python developer with 3 years experience in machine learning and data science"
jd_text = "Hiring Python engineer with ML and data science skills"

output = run_hybrid_ats(resume_text, jd_text)
output


{'final_ats_score': 99.56,
 'rule_score': 100.0,
 'ml_probability': 0.9938,
 'rule_details': {'ats_score': 100.0,
  'matched_skills': ['data science', 'python'],
  'missing_skills': [],
  'resume_skills': ['data science', 'machine learning', 'python'],
  'jd_skills': ['data science', 'python'],
  'timestamp': '2025-12-23 15:24:39.940147'}}

In [14]:
%%writefile /content/drive/MyDrive/ATS_Project_Files/scripts/hybrid_ats.py
import sys
import numpy as np
from sentence_transformers import SentenceTransformer
import joblib

# Ensure project root is visible
PROJECT_ROOT = "/content/drive/MyDrive/ATS_Project_Files"
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

from scripts.score import score_resume_against_jd


# Load ML models once (global)
SBERT_PATH = f"{PROJECT_ROOT}/models/sbert_lgbm/sbert_encoder"
LGBM_PATH = f"{PROJECT_ROOT}/models/sbert_lgbm/lgbm_model.pkl"

sbert = SentenceTransformer(SBERT_PATH)
lgbm_model = joblib.load(LGBM_PATH)


def run_hybrid_ats(resume_text, jd_text):
    """
    Hybrid ATS Engine
    30% Rule-based + 70% ML-based
    """

    # ----- RULE-BASED ATS -----
    rule_out = score_resume_against_jd(resume_text, jd_text)
    rule_score = rule_out["ats_score"]

    # ----- ML-BASED ATS -----
    emb_resume = sbert.encode(resume_text)
    emb_jd = sbert.encode(jd_text)

    features = np.abs(emb_resume - emb_jd).reshape(1, -1)

    # LightGBM Booster returns probability
    ml_prob = float(lgbm_model.predict(features)[0])
    ml_score = ml_prob * 100

    # ----- HYBRID MERGE -----
    final_score = round(
        0.3 * rule_score + 0.7 * ml_score,
        2
    )

    return {
        "final_ats_score": final_score,
        "rule_score": round(rule_score, 2),
        "ml_probability": round(ml_prob, 4),
        "rule_details": rule_out
    }


Writing /content/drive/MyDrive/ATS_Project_Files/scripts/hybrid_ats.py
