# Job-Matching-Candidate-Assessment-System
 An end-to-end AI-driven recruitment system that evaluates candidates using multi-modal analysis:

* CV & Job Description matching using **Large Language Models (LLMs)**
* Skill extraction and **quantitative scoring**
* Video-based **emotion and behavior** analysis
* Final **HR-style** verdict generation
* **You Can Check Project GitHub Repo From [this link](https://github.com/Nagwam18/Job-Matching-Candidate-Assessment-System)**

# Installation

In [None]:
!pip cache purge
!rm -rf ~/.cache/pip
!rm -rf ~/.cache/*


In [None]:
!pip install -r /kaggle/input/new1-requirements/requirements.txt
!pip install pdfplumber

# Imports

In [None]:
import os
import io
import re
import uuid
import json
import tempfile

from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from starlette.responses import JSONResponse

import pdfplumber
import pytesseract
import docx2txt
import re
from PIL import Image

import cv2
from deepface import DeepFace

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import cv2
from collections import Counter

In [None]:
from huggingface_hub import login
login(token="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")

# Functions

## load_file

In [None]:
def load_file(path):
    ext = os.path.splitext(path)[1].lower()
    if ext==".pdf":
        text=""
        with pdfplumber.open(path) as pdf:
            for p in pdf.pages:
                t = p.extract_text()
                if t: text+=t+"\n"
        if text.strip():
            return text
    
        ocr=""
        with pdfplumber.open(path) as pdf:
            for p in pdf.pages:
                img=p.to_image(resolution=300).original
                ocr+=pytesseract.image_to_string(img)
        return ocr
    if ext==".docx":
        return docx2txt.process(path) or ""
    if ext==".txt":
        return open(path,"r",encoding="utf-8", errors="ignore").read()
    if ext in [".jpg",".jpeg",".png"]:
        return pytesseract.image_to_string(Image.open(path))
    return ""


# Model

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "meta-llama/Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    cache_dir="/kaggle/temp_model",
    trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir="/kaggle/temp_model",
    device_map="auto",         
    torch_dtype=torch.float16, 
    
    trust_remote_code=True,
    repetition_penalty=1.15     )


## analyze_cv

In [None]:
import re
import torch

def analyze_cv(cv_text, jd_text):
    prompt = f"""
You are an expert recruiter.

CV:
{cv_text}

Job Description:
{jd_text}

Instructions:
1. Identify all skills required in the Job Description.
2. Identify all skills mentioned in the CV.
3. Extract the matched skills (skills present in both CV and JD, considering synonyms and related concepts).
4. Extract the non-matched skills (skills required in JD but not present in CV).
5. Calculate CV Score = (number of matched skills / total skills in JD) * 100, rounded to 1 decimal.

Return ONLY in this exact format:

Matched Skills
- skill

Non-Matched Skills
- skill

CV Score
XX.X% match

Rules:
- Do not repeat any skills.
- Do not use placeholders.
- Do not explain anything.
- Output must end after CV Score.
"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=1024,
        temperature=0.0,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if "Matched Skills" in output_text:
        output_text = output_text.split("Matched Skills", 1)[1]
        output_text = "Matched Skills\n" + output_text

    return output_text.strip()

## extract_cv score&skills

In [None]:
def extract_skills(cv_output: str):
    matched, missing = [], []

    matched_section = re.search(
        r"Matched Skills\s*:?(.*?)(Non-Matched Skills)",
        cv_output,
        re.S | re.I
    )

    if matched_section:
        for line in matched_section.group(1).splitlines():
            skill = line.strip("- ").strip()
            if skill:
                matched.append(skill)

    missing_section = re.search(
        r"Non-Matched Skills\s*:?(.*?)(CV Score)",
        cv_output,
        re.S | re.I
    )

    if missing_section:
        for line in missing_section.group(1).splitlines():
            skill = line.strip("- ").strip()
            if skill:
                missing.append(skill)

    return matched, missing

def extract_cv_score(cv_output: str):
    score_match = re.search(
        r"CV Score\s*([\d\.]+)\s*%",
        cv_output,
        re.I
    )
    return float(score_match.group(1)) if score_match else None


In [None]:
path="/kaggle/input/cv-version3/Nagwa Mohamed - AI - Machine Learning Engineer-1.pdf"
jd="""

We are looking for a Generative AI Engineer to join our research and development team.
Responsibilities:
- Design and fine-tune large language models (LLMs) using frameworks such as HuggingFace Transformers.
- Build and deploy generative AI applications for text, image, and multimodal tasks.
- Implement training pipelines with PyTorch or TensorFlow, and optimize models for performance.
- Work with cloud platforms (AWS, Azure, GCP) to scale model training and deployment.
- Apply MLOps practices using Docker, Kubernetes, MLflow, and Airflow.
- Collaborate with data scientists to integrate generative models into production systems.
- Research and experiment with diffusion models, GANs, and reinforcement learning for generative tasks.

Requirements:
- Strong programming skills in Python.
- Experience with deep learning architectures (CNNs, RNNs, Transformers).
- Knowledge of NLP techniques and computer vision.
- Familiarity with CI/CD pipelines and GitHub Actions.
- Bonus skills: LangChain, RAG (Retrieval-Augmented Generation), Vector Databases (FAISS, Pinecone, Weaviate), Databricks.


"""

cv_text = load_file(path)
cv_output = analyze_cv(cv_text, jd)

matched_skills, non_matched_skills = extract_skills(cv_output)
cv_score = extract_cv_score(cv_output)

result = {
    "matched_skills": matched_skills,
    "non_matched_skills": non_matched_skills,
    "cv_score": cv_score
}

print(result)


## analyze_video

In [None]:
import cv2
from deepface import DeepFace
from collections import Counter

def analyze_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frames_samples = 20

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        mean_intensity = gray.mean()
        if mean_intensity < 10:  
            continue        
        frames.append(frame)

    cap.release()

    if len(frames) == 0:
        return {"confidence": 0, "anxiety": 0, "emotions_distribution": {}, "frames_sampled": 0}

    step = max(1, len(frames) // frames_samples)
    selected_frames = [frames[i] for i in range(0, len(frames), step)][:frames_samples]

    emotions = []
    for frame in selected_frames:
        try:
            result = DeepFace.analyze(frame, actions=["emotion"], enforce_detection=False)
            if isinstance(result, list):
                result = result[0]
            emotions.append(result.get("dominant_emotion", "unknown"))
        except Exception:
            emotions.append("unknown")

    confidence_emotions = {"happy", "neutral", "calm"}
    anxiety_emotions = {"fear", "sad", "angry", "disgust"}     

    distribution = Counter(emotions)

    confidence_count = sum(distribution[e] for e in confidence_emotions)
    anxiety_count = sum(distribution[e] for e in anxiety_emotions)
    total = confidence_count + anxiety_count

    confidence_score = round((confidence_count / total) * 100, 1) if total > 0 else 0.0
    anxiety_score = round((anxiety_count / total) * 100, 1) if total > 0 else 0.0

    return {
        "confidence": confidence_score,   
        "anxiety": anxiety_score,         
        "emotions_distribution": dict(distribution),
        "frames_sampled": len(selected_frames)
    }


In [None]:
# video_path="/kaggle/input/test2v/test2.mp4"
# result = analyze_video(video_path)
# print(result)
# print(result["confidence"])

In [None]:
# video_path="/kaggle/input/test2v/test2.mp4"
# result = analyze_video(video_path)
# print(result)

## generate_verdict

In [None]:
import re

def generate_verdict(cv_score, matched, missing, confidence, anxiety):
    cv_score = cv_score or 0.0
    confidence = confidence or 0.0
    anxiety = anxiety or 0.0
    matched = matched or []
    missing = missing or []

    behavior = (
        "confident" if confidence > anxiety else
        "anxious" if anxiety > confidence else
        "neutral"
    )

    video_score = confidence
    final_score = round((cv_score * 0.7 + video_score * 0.3), 2)

    if cv_score == 0.0 or not matched:
        verdict_text = (
            f"The candidate has no matching skills in the CV and appears {behavior}. "
            "The candidate is not suitable for the position."
        )
        return verdict_text, final_score, behavior, video_score

    matched_skills = ", ".join(matched)
    missing_skills = ", ".join(missing) if missing else "None"

    prompt = f"""
You are an expert recruiter.

Candidate Evaluation:

- CV Score: {cv_score}%
- Video Behavior: {behavior}
- Matched Skills: {matched_skills}
- Missing Skills: {missing_skills}
- Final Score: {final_score}

Task:

Write a concise HR verdict in EXACTLY TWO sentences. 
Sentence 1: Highlight the candidate's strengths and matched skills.
Sentence 2: Mention missing skills and video behavior, then give a clear recommendation.
Do not add anything else.
"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.3,
        top_p=0.9,
        do_sample=True
    )

    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if full_output.startswith(prompt):
        verdict_text = full_output[len(prompt):].strip()
    else:
        verdict_text = full_output.strip()

    sentences = re.split(r'(?<=[.!?])\s+', verdict_text)

    if len(sentences) >= 2:
        verdict_text = ' '.join(sentences[:2])
    elif len(sentences) == 1:
        verdict_text = sentences[0]
    else:
        verdict_text = (
            f"The candidate shows {behavior} behavior with CV score {cv_score}%. "
            f"Matched skills: {matched_skills}. Missing skills: {missing_skills}."
        )

    return verdict_text, final_score, behavior, video_score


In [None]:
def format_output(cv_score, video_score, final_score, verdict, behavior):
    text = f"""Final Score:
• CV Score (70%): {cv_score}
• Video Score (30%): {video_score}
• Candidate behavior: {behavior}
• Final = {final_score}
{verdict}
"""
    return text


## FASTAPI APP

In [None]:
# !pip install fastapi uvicorn nest_asyncio python-multipart


In [None]:
import uvicorn
from fastapi import FastAPI, UploadFile, File, Form
from threading import Thread
import uuid
import os
import requests
from threading import Thread
import json


In [None]:
import nest_asyncio
nest_asyncio.apply()


In [None]:
app = FastAPI()
results = {}

@app.post("/evaluate")
async def evaluate(
    job_description: str = Form(...),
    cv_path: str = Form(...),
    video_path: str = Form(...)
):
    session_id = str(uuid.uuid4())

    cv_text = load_file(cv_path)
    cv_analysis = analyze_cv(cv_text, job_description)
    cv_score = extract_cv_score(cv_analysis) or 0.0 
    matched, missing = extract_skills(cv_analysis)
    
    video_result = analyze_video(video_path)
    confidence = video_result.get("confidence", 0.0) 
    anxiety = video_result.get("anxiety", 0.0)
    
    verdict, final_score, behavior, video_score = generate_verdict(
        cv_score, matched, missing, confidence, anxiety
    )


    formatted_output = format_output(cv_score, video_score, final_score, verdict, behavior)

    results[session_id] = {
        "cv_text": cv_text,
        "cv_analysis": cv_analysis,
        "cv_score": cv_score,
        "matched_skills": matched,
        "missing_skills": missing,
        "video_result": video_result,
        "behavior": behavior,
        "final_score": final_score,
        "llm_verdict": verdict,
        "formatted_output": formatted_output  
    }

    return {"session_id": session_id, "output": formatted_output}



@app.get("/results")
async def get_results(session_id: str):
    if session_id not in results:
        return {"error": "invalid session id"}
    return results[session_id]


def run_api():
  uvicorn.run(app, host="0.0.0.0", port=8009)

thread = Thread(target=run_api, daemon=True)
thread.start()


In [None]:
jd="""To support all aspects of human resources functions within the company, including recruitment, training and development, performance management, and ensuring compliance with company policies and procedures to maintain an effective and organized work environment.

Key Responsibilities:

Manage recruitment processes: posting job openings, screening resumes, conducting interviews, and selecting candidates.

Coordinate employee training and development programs.

Monitor performance evaluations and contribute to employee development plans.

Implement company policies and procedures and ensure compliance.

Maintain and update employee records.

Handle employee-related matters such as leave, attendance, and benefits.

Provide support and guidance to employees regarding internal policies and company regulations.

Prepare HR reports and statistics as needed.

Qualifications:

Bachelor’s degree in Business Administration, Human Resources, or related field.

1–3 years of experience in Human Resources (depending on level).

Good knowledge of labor laws and local regulations.

Strong communication and organizational skills.

Ability to handle sensitive information with confidentiality.

Proficiency in computer applications and HR software.

Personal Skills:

Problem-solving and decision-making abilities.

Negotiation and persuasion skills.

Ability to work collaboratively in a team environment.


"""

resp = requests.post(
    "http://127.0.0.1:8009/evaluate",
    data={
        "job_description": jd,
        "cv_path": "/kaggle/input/cv-version4/Nagwa Mohamed-Data Scientist II.pdf",
        "video_path": "/kaggle/input/video1/test1.webm"
    }
)

data = resp.json()  
print(data["output"])

# Dash App

In [None]:
from pyngrok import ngrok, conf
conf.get_default().auth_token = "36F0BxzfgoiXChAeN7oJ4MflnlF_2AjXn8jbAnkxHQh8WLAiT"


In [None]:
import dash
from dash import html, dcc, Input, Output, State
import dash_bootstrap_components as dbc
import base64

app = dash.Dash(
    __name__,
    external_stylesheets=[dbc.themes.BOOTSTRAP],
    suppress_callback_exceptions=True
)

# LAYOUT
app.layout = html.Div(
    style={"padding": "30px", "backgroundColor": "#f5f7fa"},
    children=[

        html.Div(
            style={"display": "flex", "gap": "30px", "marginTop": "20px"},
            children=[

                # LEFT SIDE (UPLOADS)
                html.Div(
                    style={"width": "48%", "background": "white",
                           "padding": "20px", "borderRadius": "10px"},
                    children=[

                        html.H4("Upload CV"),
                        dcc.Upload(
                            id="upload-cv",
                            children=html.Div(
                                id="cv-upload-text",
                                children=["Drag & Drop or Select CV File"]
                            ),
                            style={
                                "height": "120px",
                                "border": "2px dashed #007bff",
                                "borderRadius": "10px",
                                "textAlign": "center",
                                "paddingTop": "40px",
                                "color": "#007bff"
                            }
                        ),
                        html.Div(id="cv_upload_status", style={"marginTop": "10px"}),

                        html.H4("Upload Video", style={"marginTop": "30px"}),
                        dcc.Upload(
                            id="upload-video",
                            children=html.Div(
                                id="video-upload-text",
                                children=["Drag & Drop or Select Video File"]
                            ),
                            style={
                                "height": "120px",
                                "border": "2px dashed #28a745",
                                "borderRadius": "10px",
                                "textAlign": "center",
                                "paddingTop": "40px",
                                "color": "#28a745"
                            }
                        ),
                        html.Div(id="video_upload_status", style={"marginTop": "10px"}),
                    ]
                ),

                # RIGHT SIDE (JD)
                html.Div(
                    style={"width": "48%", "background": "white",
                           "padding": "20px", "borderRadius": "10px"},
                    children=[
                        html.H4("Job Description"),
                        dcc.Textarea(
                            id="jd-text",
                            placeholder="Paste Job Description here...",
                            style={"width": "100%", "height": "300px",
                                   "borderRadius": "10px",
                                   "padding": "10px",
                                   "border": "1px solid #ccc"}
                        ),
                    ]
                )
            ]
        ),

        # RESULTS
        html.Div(
            style={"background": "white", "padding": "20px",
                   "marginTop": "30px", "borderRadius": "10px"},
            children=[

                html.Div(style={"textAlign": "center"}, children=[
                    html.Button(
                        "Evaluate Candidate",
                        id="eval-btn",
                        n_clicks=0,
                        style={
                            "background": "#007bff",
                            "color": "white",
                            "padding": "14px 20px",
                            "borderRadius": "8px",
                            "fontSize": "25px",
                            "cursor": "pointer"
                        }
                    )
                ]),

                dcc.Loading(
                    id="loading_wrapper",
                    type="circle",
                    color="#003366",
                    children=html.Div(
                        id="results-output",
                        style={
                            "whiteSpace": "pre-wrap",
                            "minHeight": "150px",
                            "marginTop": "20px",
                            "fontSize": "17px"
                        }
                    )
                )
            ]
        )
    ]
)

#CV UPLOAD STATUS CALLBACK
@app.callback(
    Output("cv-upload-text", "children"),
    Input("upload-cv", "contents"),
    State("upload-cv", "filename"),
)
def update_cv_status(content, filename):
    if content:
        return f"Uploaded: {filename}"
    return "Drag & Drop or Select CV File"

# VIDEO UPLOAD STATUS CALLBACK
@app.callback(
    Output("video-upload-text", "children"),
    Input("upload-video", "contents"),
    State("upload-video", "filename"),
)
def update_video_status(content, filename):
    if content:
        return f"Uploaded: {filename}"
    return "Drag & Drop or Select Video File"


#MAIN EVALUATION CALLBACK
@app.callback(
    Output("results-output", "children"),
    Input("eval-btn", "n_clicks"),
    State("upload-cv", "contents"),
    State("upload-cv", "filename"),
    State("upload-video", "contents"),
    State("upload-video", "filename"),
    State("jd-text", "value"),
)
def evaluate(n_clicks, cv_content, cv_name, video_content, video_name, jd_text):

    if not n_clicks:
        return ""

    if not (cv_content and video_content and jd_text):
        return "Please upload CV, Video, and paste JD."

    # SAVE CV 
    cv_data = cv_content.split(",")[1]
    cv_bytes = base64.b64decode(cv_data)

    cv_path = f"/tmp/{cv_name}"
    with open(cv_path, "wb") as f:
        f.write(cv_bytes)

    cv_text = load_file(cv_path)
    result= analyze_cv(cv_text, jd_text)
    cv_score = extract_cv_score(result)
    matched, missing = extract_skills(result)


    #SAVE VIDEO 
    video_data = video_content.split(",")[1]
    video_bytes = base64.b64decode(video_data)

    video_path = f"/tmp/{video_name}"
    with open(video_path, "wb") as f:
        f.write(video_bytes)

    video_result = analyze_video(video_path)
    confidence = video_result.get("confidence", 0.0)
    anxiety = video_result.get("anxiety", 0.0)

    verdict, final_score, behavior, video_score = generate_verdict(
    cv_score, matched, missing, confidence, anxiety  )
                                

    result_text = format_output(cv_score, video_score, final_score, verdict, behavior)

    return html.Pre(result_text, style={"whiteSpace": "pre-wrap", "fontSize": "25px"})


if __name__ == "__main__":
    from pyngrok import ngrok
    ngrok.kill()
    public_url = ngrok.connect(8050)
    print("Dash app running at:", public_url)
    app.run(host="0.0.0.0", port=8050)
