# Installation

In [5]:
# !pip install fastapi starlette pdfplumber pytesseract docx2txt Pillow opencv-python deepface google-generativeai
# !sudo apt-get install -y tesseract-ocr

# !pip install --upgrade numpy==1.26.4 scipy==1.12.0
# !pip install --upgrade tensorflow==2.15.0 keras==2.15.0

# !pip install --force-reinstall deepface

# !sudo apt-get purge python3-scipy


# !pip install pyngrok

# !pip install dash dash-bootstrap-components
# !pip install plotly


# Imports

In [47]:
import os
import io
import re
import uuid
import json
import tempfile

from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from starlette.responses import JSONResponse

import pdfplumber
import pytesseract
import docx2txt
import re
from PIL import Image

import cv2
from deepface import DeepFace

import google.generativeai as genai

In [13]:
# api_key = "AIzaSyBDHBQbjUajMonEQKjBc9lbovDNIt5q_uM"
# genai.configure(api_key=api_key)

# Functions

## load_file

In [14]:
def load_file(path):
    ext = os.path.splitext(path)[1].lower()
    if ext==".pdf":
        text=""
        with pdfplumber.open(path) as pdf:
            for p in pdf.pages:
                t = p.extract_text()
                if t: text+=t+"\n"
        if text.strip():
            return text
    
        ocr=""
        with pdfplumber.open(path) as pdf:
            for p in pdf.pages:
                img=p.to_image(resolution=300).original
                ocr+=pytesseract.image_to_string(img)
        return ocr
    if ext==".docx":
        return docx2txt.process(path) or ""
    if ext==".txt":
        return open(path,"r",encoding="utf-8", errors="ignore").read()
    if ext in [".jpg",".jpeg",".png"]:
        return pytesseract.image_to_string(Image.open(path))
    return ""


## analyze_cv

In [15]:
def analyze_cv(cv_text, jd_text):
    prompt = f"""
You are an expert recruiter.
You can analyze CVs and Job Descriptions from any domain (technical, business, healthcare, education, customer service, etc.).
CV:
{cv_text}

Job Description:
{jd_text}

Tasks:
1. Extract skills , education and experience from CV and Job Description.
2. Perform semantic matching (synonyms, related tools, transferable skills).
3. Apply weighted scoring:
   - Skills: 40%
   - Work Experience: 30%
   - Education: 20%
   - Certifications/Projects: if explicitly required in JD.

Return the result strictly in this format:

Matched Skills
- skill1
- skill2
...

Missing Skills
- skill1
- skill2
...

CV Score
XX.X% match

Rules:
- Do not acknowledge instructions.
- Do not explain.
- Only output the result in the exact format above.
"""
    api_key = "AIzaSyBDHBQbjUajMonEQKjBc9lbovDNIt5q_uM"
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel("gemini-2.0-flash")
    response = model.generate_content(
        prompt,
        generation_config={
            "temperature": 0,
            "top_p": 1,
            "top_k": 1,
            "candidate_count": 1
        }
    )
    return response.candidates[0].content.parts[0].text


## extract_cv score&skills

In [16]:
def extract_cv_score(result_text):
    match = re.search(r"CV Score\s*([\d.]+)%", result_text, re.IGNORECASE)
    if match:
        score_percent = float(match.group(1))     
        return score_percent / 100               
    else:
        return None
        
def extract_skills(result_text):
    matched = []
    missing = []

    lines = result_text.splitlines()
    mode = None

    for line in lines:
        line = line.strip()

        if line.lower().startswith("matched skills"):
            mode = "matched"
            continue
        elif line.lower().startswith("missing skills"):
            mode = "missing"
            continue
        elif line.lower().startswith("cv score"):
            mode = None
            continue

        if mode == "matched" and line.startswith("-"):
            matched.append(line[1:].strip())
        elif mode == "missing" and line.startswith("-"):
            missing.append(line[1:].strip())

    return matched, missing


## analyze_video

In [17]:
def analyze_video(video_path):
    import cv2
    from deepface import DeepFace
    import json
    import google.generativeai as genai

    api_key = "AIzaSyBDHBQbjUajMonEQKjBc9lbovDNIt5q_uM"
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel("gemini-2.5-flash")

    cap = cv2.VideoCapture(video_path)
    frames = []
    frames_samples = 20

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()

    if len(frames) == 0:
        return {"confidence": 0, "anxiety": 0, "emotions": [], "frames_sampled": 0}

    step = max(1, len(frames) // frames_samples)
    selected_frames = [frames[i] for i in range(0, len(frames), step)][:frames_samples]

    emotions = []
    for frame in selected_frames:
        try:
            result = DeepFace.analyze(frame, actions=["emotion"], enforce_detection=False)
            if isinstance(result, list):
                result = result[0]
            emotions.append(result.get("dominant_emotion", "unknown"))
        except Exception:
            emotions.append("unknown")

    # ========= Prompt مضبوط =========
    prompt = f"""
You are an expert in behavioral psychology and interview analysis.

Emotions detected from candidate video: {emotions}

Tasks:
1. Estimate overall confidence (0 to 1).
2. Estimate overall anxiety (0 to 1).

Rules:
- Confidence increases with: happy, neutral, calm
- Anxiety increases with: fear, sad, angry, disgust
- Return ONLY a valid JSON object, no text, no explanation.

Example:
{{"confidence": 0.72, "anxiety": 0.28}}
"""

    response = model.generate_content(prompt)
    raw_text = getattr(response, "text", "").strip()
    match = re.search(r"\{.*\}", raw_text, re.DOTALL)
    if match:
        raw_text = match.group(0)

    try:
        data = json.loads(raw_text)
        confidence = float(data.get("confidence", 0.0))
        anxiety = float(data.get("anxiety", 0.0))
    except Exception:
        confidence, anxiety = 0.0, 0.0

    return {
        "confidence": confidence,
        "anxiety": anxiety,
        "emotions": emotions,
        "frames_sampled": len(selected_frames),
        "llm_raw": raw_text
    }


In [18]:
# video_path="/kaggle/input/video1/test1.webm"
# result = analyze_video(video_path)
# print(result)

## generate_verdict

In [40]:
def generate_verdict(cv_score, matched, missing, confidence, anxiety):
    behavior = (
        "confident" if confidence > anxiety else
        "anxious" if anxiety > confidence else
        "neutral"
    )

    final_score = round((cv_score * 0.7 + max(confidence, anxiety) * 0.3), 2)

    matched_skills = ", ".join(matched)
    missing_skills = ", ".join(missing)

    prompt = f"""
You are an expert recruiter.

Candidate Evaluation:

CV Score: {cv_score}
Video Behavior: {behavior}
Matched Skills: {matched_skills}
Missing Skills: {missing_skills}
Final Score: {final_score}

Write a concise, professional verdict for HR.
Guidelines:
- Maximum 2 sentences.
- Highlight strengths clearly.
- Mention missing skills briefly.
- Explicitly state video behavior (e.g., "Candidate is confident", "Candidate appeared anxious").
- End with a clear recommendation (e.g., "worth considering", "needs development").
- Return exactly this format:
llm_verdict:
<sentence1>
<sentence2>
"""
    api_key = "AIzaSyBDHBQbjUajMonEQKjBc9lbovDNIt5q_uM"
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel("gemini-2.0-flash")
    response = model.generate_content(prompt)
    verdict = response.text.strip()

    return verdict, final_score ,behavior


In [44]:
def format_output(cv_score, video_score, final_score, verdict,behavior):
    text = f"""Final Score:
• CV Match (70%): {cv_score}
• Video Score (30%): {video_score}
• Candidate behavior: {behavior}
• Final = {final_score}
• {verdict}
"""
    return text

# Dash App

In [21]:
from pyngrok import ngrok, conf
conf.get_default().auth_token = "36F0BxzfgoiXChAeN7oJ4MflnlF_2AjXn8jbAnkxHQh8WLAiT"


In [49]:
import dash
from dash import html, dcc, Input, Output, State
import dash_bootstrap_components as dbc
import base64

app = dash.Dash(
    __name__,
    external_stylesheets=[dbc.themes.BOOTSTRAP],
    suppress_callback_exceptions=True
)

# LAYOUT
app.layout = html.Div(
    style={"padding": "30px", "backgroundColor": "#f5f7fa"},
    children=[

        html.Div(
            style={"display": "flex", "gap": "30px", "marginTop": "20px"},
            children=[

                # LEFT SIDE (UPLOADS)
                html.Div(
                    style={"width": "48%", "background": "white",
                           "padding": "20px", "borderRadius": "10px"},
                    children=[

                        html.H4("Upload CV"),
                        dcc.Upload(
                            id="upload-cv",
                            children=html.Div(
                                id="cv-upload-text",
                                children=["Drag & Drop or Select CV File"]
                            ),
                            style={
                                "height": "120px",
                                "border": "2px dashed #007bff",
                                "borderRadius": "10px",
                                "textAlign": "center",
                                "paddingTop": "40px",
                                "color": "#007bff"
                            }
                        ),
                        html.Div(id="cv_upload_status", style={"marginTop": "10px"}),

                        html.H4("Upload Video", style={"marginTop": "30px"}),
                        dcc.Upload(
                            id="upload-video",
                            children=html.Div(
                                id="video-upload-text",
                                children=["Drag & Drop or Select Video File"]
                            ),
                            style={
                                "height": "120px",
                                "border": "2px dashed #28a745",
                                "borderRadius": "10px",
                                "textAlign": "center",
                                "paddingTop": "40px",
                                "color": "#28a745"
                            }
                        ),
                        html.Div(id="video_upload_status", style={"marginTop": "10px"}),
                    ]
                ),

                # RIGHT SIDE (JD)
                html.Div(
                    style={"width": "48%", "background": "white",
                           "padding": "20px", "borderRadius": "10px"},
                    children=[
                        html.H4("Job Description"),
                        dcc.Textarea(
                            id="jd-text",
                            placeholder="Paste Job Description here...",
                            style={"width": "100%", "height": "300px",
                                   "borderRadius": "10px",
                                   "padding": "10px",
                                   "border": "1px solid #ccc"}
                        ),
                    ]
                )
            ]
        ),

        # RESULTS
        html.Div(
            style={"background": "white", "padding": "20px",
                   "marginTop": "30px", "borderRadius": "10px"},
            children=[

                html.Div(style={"textAlign": "center"}, children=[
                    html.Button(
                        "Evaluate Candidate",
                        id="eval-btn",
                        n_clicks=0,
                        style={
                            "background": "#007bff",
                            "color": "white",
                            "padding": "14px 20px",
                            "borderRadius": "8px",
                            "fontSize": "25px",
                            "cursor": "pointer"
                        }
                    )
                ]),

                dcc.Loading(
                    id="loading_wrapper",
                    type="circle",
                    color="#003366",
                    children=html.Div(
                        id="results-output",
                        style={
                            "whiteSpace": "pre-wrap",
                            "minHeight": "150px",
                            "marginTop": "20px",
                            "fontSize": "17px"
                        }
                    )
                )
            ]
        )
    ]
)

#CV UPLOAD STATUS CALLBACK
@app.callback(
    Output("cv-upload-text", "children"),
    Input("upload-cv", "contents"),
    State("upload-cv", "filename"),
)
def update_cv_status(content, filename):
    if content:
        return f"Uploaded: {filename}"
    return "Drag & Drop or Select CV File"

# VIDEO UPLOAD STATUS CALLBACK
@app.callback(
    Output("video-upload-text", "children"),
    Input("upload-video", "contents"),
    State("upload-video", "filename"),
)
def update_video_status(content, filename):
    if content:
        return f"Uploaded: {filename}"
    return "Drag & Drop or Select Video File"


#MAIN EVALUATION CALLBACK
@app.callback(
    Output("results-output", "children"),
    Input("eval-btn", "n_clicks"),
    State("upload-cv", "contents"),
    State("upload-cv", "filename"),
    State("upload-video", "contents"),
    State("upload-video", "filename"),
    State("jd-text", "value"),
)
def evaluate(n_clicks, cv_content, cv_name, video_content, video_name, jd_text):

    if not n_clicks:
        return ""

    if not (cv_content and video_content and jd_text):
        return "⚠ Please upload CV, Video, and paste JD."

    # ---------------- SAVE CV ----------------
    cv_data = cv_content.split(",")[1]
    cv_bytes = base64.b64decode(cv_data)

    cv_path = f"/tmp/{cv_name}"
    with open(cv_path, "wb") as f:
        f.write(cv_bytes)

    cv_text = load_file(cv_path)
    result= analyze_cv(cv_text, jd_text)
    cv_score = extract_cv_score(result)
    matched, missing = extract_skills(result)


    # ---------------- SAVE VIDEO ----------------
    video_data = video_content.split(",")[1]
    video_bytes = base64.b64decode(video_data)

    video_path = f"/tmp/{video_name}"
    with open(video_path, "wb") as f:
        f.write(video_bytes)

    video_result = analyze_video(video_path)
    confidence = video_result.get("confidence", 0.0)
    anxiety = video_result.get("anxiety", 0.0)

    # ---------------- VERDICT ----------------
    verdict, final_score,behavior = generate_verdict(
        cv_score, matched, missing, confidence, anxiety
    )

    # ---------------- OUTPUT ----------------
    result_text = format_output(cv_score, max(confidence, anxiety), final_score, verdict,behavior)
    return html.Pre(result_text, style={"whiteSpace": "pre-wrap", "fontSize": "25px"})


if __name__ == "__main__":
    from pyngrok import ngrok
    ngrok.kill()
    public_url = ngrok.connect(8050)
    print("Dash app running at:", public_url)
    app.run(host="0.0.0.0", port=8050)


Dash app running at: NgrokTunnel: "https://8b4ae6f6f691.ngrok-free.app" -> "http://localhost:8050"
