In [1]:
# ============================================
# 2) Imports & Configuration
# ============================================

import os
import sys
import math
import json
import wave
import contextlib
import torch
import numpy as np
import librosa
import soundfile as sf
import re

from dataclasses import dataclass, asdict,field
from typing import List, Tuple, Dict, Optional
from transformers import AutoTokenizer, AutoModelForCausalLM


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --------------------------------------------
# Determinism (important for calibration)
# --------------------------------------------
np.random.seed(42)

# --------------------------------------------
# Optional ML Dependencies
# --------------------------------------------

WhisperModel = None
pipeline = None
nlp = None

# Faster-Whisper (Speech ‚Üí Text)
try:
    from faster_whisper import WhisperModel
except ImportError:
    print("[WARN] faster-whisper not installed. Transcription will be unavailable.")

# Transformers (Sentiment / LLMs)
try:
    from transformers import pipeline
except ImportError:
    print("[WARN] transformers not installed. Sentiment/LLM features disabled.")

# SpaCy (Linguistic analysis)
try:
    import spacy
    nlp = spacy.load("en_core_web_sm")
    NLP_MODE = "spacy"
except Exception:
    nlp = None
    NLP_MODE = "regex"
    print("[WARN] SpaCy model not found. Falling back to regex-based analysis (lower accuracy).")

# --------------------------------------------
# Device Configuration (auto-detect)
# --------------------------------------------

def _detect_device():
    try:
        import torch
        if torch.cuda.is_available():
            return "cuda"
        if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
            return "mps"
    except Exception:
        pass
    return "cpu"

DEVICE = _detect_device()

print(f"[INFO] Running on device: {DEVICE}")
print(f"[INFO] NLP mode: {NLP_MODE}")

# --------------------------------------------
# Interview Analysis Global Config
# (Authoritative tuning source)
# --------------------------------------------

INTERVIEW_CONFIG = {
    # Speaking pace
    "ideal_wpm_range": (125, 145),
    "acceptable_wpm_range": (145, 160),
    "hard_penalty_wpm": 160,

    # Fillers & hesitation
    "max_safe_fillers_per_min": 3.0,
    "filler_penalty_weight": 1.0,
    "max_filler_bonus": 5,

    # Hedging & confidence
    "hedge_penalty_weight": 2.5,
    "min_confidence_score": 40,

    # Delivery (pace + rhythm + pauses)
    "delivery_penalty_weight": 0.20,

    # Structure-agnostic speech control
    "long_block_penalty": 12,
}



[INFO] Running on device: mps
[INFO] NLP mode: spacy


In [3]:
# ============================================
# 3) Audio Utilities & Pitch Analysis
# ============================================

def load_audio_mono(path: str, sr: int = 16000):
    """Load audio as mono float32 at target sampling rate."""
    audio, orig_sr = librosa.load(path, sr=None, mono=True)
    if orig_sr != sr:
        audio = librosa.resample(audio, orig_sr=orig_sr, target_sr=sr)
    return audio.astype(np.float32), sr


def analyze_pitch_dynamics(audio: np.ndarray, sr: int) -> Dict:
    """
    Analyze pitch variation to detect monotone delivery.

    Returns:
        {
            'std_semitones': float,
            'voiced_ratio': float,
            'monotone_score': float,   # 0 (expressive) ‚Üí 1 (very monotone)
            'is_monotone': bool
        }
    """
    try:
        f0, voiced_flag, _ = librosa.pyin(
            audio,
            fmin=librosa.note_to_hz("C2"),
            fmax=librosa.note_to_hz("C7"),
            sr=sr,
            frame_length=2048
        )

        # Use voiced_flag for accurate voiced coverage
        voiced_f0 = f0[voiced_flag]
        voiced_ratio = float(np.mean(voiced_flag)) if len(voiced_flag) else 0.0

        # Not enough voiced speech ‚Üí unreliable prosody signal
        if len(voiced_f0) < 10 or voiced_ratio < 0.25:
            return {
                "std_semitones": 0.0,
                "voiced_ratio": voiced_ratio,
                "monotone_score": 0.0,
                "is_monotone": False
            }

        # Convert Hz ‚Üí semitones (log scale, speaker-independent)
        ref_hz = max(np.mean(voiced_f0), 1e-3)  # numerical safety
        semitones = 12.0 * np.log2(voiced_f0 / ref_hz)
        std_semitones = float(np.std(semitones))

        # Interview-calibrated thresholds
        MONOTONE_CENTER = 2.5   # semitones
        MONOTONE_LIMIT = 1.8

        monotone_score = np.clip(
            (MONOTONE_CENTER - std_semitones) / MONOTONE_CENTER,
            0.0,
            1.0
        )
        is_monotone = std_semitones < MONOTONE_LIMIT

        return {
            "std_semitones": std_semitones,
            "voiced_ratio": voiced_ratio,
            "monotone_score": monotone_score,
            "is_monotone": is_monotone
        }

    except Exception as e:
        print(f"[WARN] Pitch analysis failed: {e}")
        return {
            "std_semitones": 0.0,
            "voiced_ratio": 0.0,
            "monotone_score": 0.0,
            "is_monotone": False
        }


In [4]:
# ============================================
# 4) Transcription (Faster-Whisper) ‚Äì MPS SAFE
# ============================================

@dataclass
class TranscriptionResult:
    text: str
    segments: List[Dict]
    language: str
    duration: float
    num_segments: int


_WHISPER_MODEL = None  # cached model


def transcribe_audio(
    audio_path: str,
    model_size: str = "medium"
) -> TranscriptionResult:
    global _WHISPER_MODEL

    if WhisperModel is None:
        raise ImportError("faster-whisper is not installed.")

    # IMPORTANT: faster-whisper does NOT support MPS
    WHISPER_DEVICE = "cuda" if DEVICE == "cuda" else "cpu"

    if WHISPER_DEVICE == "cuda":
        compute_type = "float16"
    else:
        compute_type = "int8"

    if _WHISPER_MODEL is None:
        _WHISPER_MODEL = WhisperModel(
            model_size,
            device=WHISPER_DEVICE,
            compute_type=compute_type
        )

    segments_gen, info = _WHISPER_MODEL.transcribe(
        audio_path,
        beam_size=5,
        word_timestamps=True,
        vad_filter=True
    )

    segments = list(segments_gen)

    text = "".join(s.text for s in segments).strip()

    seg_list = []
    all_words = []

    for s in segments:
        words = []
        for w in (s.words or []):
            words.append({
                "start": float(w.start),
                "end": float(w.end),
                "word": w.word
            })
            all_words.append(w)

        seg_list.append({
            "start": float(s.start),
            "end": float(s.end),
            "text": s.text,
            "words": words
        })

    duration = (
        float(all_words[-1].end - all_words[0].start)
        if all_words else 0.0
    )

    return TranscriptionResult(
        text=text,
        segments=seg_list,
        language=info.language,
        duration=duration,
        num_segments=len(segments)
    )


In [5]:
# ============================================
# 5) NLP: Context-Aware Signals
# ============================================

FILLERS_SIMPLE = {"um", "uh", "umm", "uhh"}
MULTI_FILLERS = ["you know", "i mean"]

HEDGE_KEYWORDS = {"maybe", "probably", "possibly", "might"}
HEDGE_PHRASES = [
    "i think", "not sure", "kind of", "sort of",
    "i guess", "might be", "i don't remember"
]

OWNERSHIP_VERBS = {"build", "design", "lead", "implement", "create", "manage", "solve", "drive"}
APOLOGIES = ["sorry", "apologize", "i forgot", "i didn't prepare", "excuse me"]

def detect_signals(transcript: str, segments: List[Dict]) -> Dict:
    text_lower = transcript.lower()

    filler_count = 0
    hedge_count = 0
    own_count = 0
    passive_count = 0
    apology_count = 0

    # -------------------------
    # NLP-aware analysis
    # -------------------------
    if nlp:
        doc = nlp(transcript)

        for token in doc:
            t = token.text.lower()

            # ---- Fillers ----
            if t in FILLERS_SIMPLE:
                filler_count += 1

            if t == "like" and token.pos_ == "INTJ":
                filler_count += 1

            # ---- Hedging (token-level only) ----
            if t in HEDGE_KEYWORDS:
                hedge_count += 1

            if t == "think" and token.head.text.lower() == "i":
                hedge_count += 1

            # ---- Ownership (broader patterns) ----
            if token.lemma_ in OWNERSHIP_VERBS:
                if any(tok.text.lower() == "i" for tok in token.subtree):
                    own_count += 1

            # ---- Passive (evasive only) ----
            if token.dep_ == "auxpass":
                # penalize only if no ownership nearby
                if not any(tok.text.lower() == "i" for tok in token.head.subtree):
                    passive_count += 1

        # ---- Phrase-level signals (NO double count) ----
        for f in MULTI_FILLERS:
            filler_count += text_lower.count(f)

        for h in HEDGE_PHRASES:
            hedge_count += text_lower.count(h)

    else:
        # Regex fallback
        filler_count += sum(text_lower.count(f) for f in FILLERS_SIMPLE)
        filler_count += sum(text_lower.count(f) for f in MULTI_FILLERS)
        hedge_count += sum(text_lower.count(h) for h in HEDGE_PHRASES)

    # -------------------------
    # Apologies
    # -------------------------
    apology_count = sum(text_lower.count(a) for a in APOLOGIES)

    # -------------------------
    # Pause & Energy Analysis
    # -------------------------
    long_pauses = 0
    long_speech_blocks = 0

    all_words = []
    for seg in segments:
        all_words.extend(seg.get("words", []))

    for i in range(1, len(all_words)):
        gap = all_words[i]["start"] - all_words[i-1]["end"]
        if gap > 1.2:
            long_pauses += 1

    for seg in segments:
        if (seg["end"] - seg["start"]) > 10.0:
            long_speech_blocks += 1

    # -------------------------
    # Semantic uncertainty
    # -------------------------
    uncertainty_patterns = ["or maybe", "not sure if", "i think it was", "can't remember"]
    hedge_count += sum(text_lower.count(p) for p in uncertainty_patterns)

    return {
        "filler_count": filler_count,
        "hedge_count": hedge_count,
        "own_count": own_count,
        "passive_count": passive_count,
        "apology_count": apology_count,
        "long_pauses": long_pauses,
        "long_speech_blocks": long_speech_blocks
    }


In [6]:
# ============================================
# 6) Advanced Scoring Engine (Calibrated)
# ============================================

@dataclass
class InterviewScore:
    total_score: float
    metrics: Dict
    feedback: List[str]


def calculate_score(
    transcript: str,
    duration: float,
    signals: Dict,
    pitch_data: Dict,
    sentiment_res
) -> InterviewScore:

    score = 100.0
    feedback = []

    duration_min = max(duration / 60.0, 1.0)

    # -------------------------
    # 1. CONFIDENCE (HEDGING)
    # -------------------------
    hedges_per_min = signals["hedge_count"] / duration_min
    if hedges_per_min > 1.0:
        pen = min((hedges_per_min - 1.0) * 4.0, 22.0)
        score -= pen
        feedback.append(
            f"Hedging detected ({hedges_per_min:.1f}/min). Be more decisive."
        )

    if signals["apology_count"] > 0:
        pen = signals["apology_count"] * 4.0
        score -= pen
        feedback.append("Avoid apologizing or underselling yourself.")

    # -------------------------
    # 2. OWNERSHIP vs PASSIVE
    # -------------------------
    own_rate = signals["own_count"] / duration_min
    passive_rate = signals["passive_count"] / duration_min

    if own_rate > passive_rate + 0.5:
        bonus = min((own_rate - passive_rate) * 2.0, 8.0)
        score += bonus
        feedback.append("Good ownership language detected.")
    elif passive_rate > own_rate + 1.0:
        score -= 5.0
        feedback.append("Excessive passive voice. Use active language.")

    # -------------------------
    # 3. DELIVERY
    # -------------------------

    # A) Fillers
    fillers_per_min = signals["filler_count"] / duration_min
    if fillers_per_min > 3.0:
        pen = min((fillers_per_min - 3.0) * 2.0, 15.0)
        score -= pen
        feedback.append(
            f"High filler usage ({fillers_per_min:.1f}/min)."
        )

    # B) Long pauses
    pauses_per_min = signals["long_pauses"] / duration_min
    if pauses_per_min > 2.0:
        pen = min((pauses_per_min - 2.0) * 1.5, 8.0)
        score -= pen
        feedback.append("Frequent long pauses detected.")

    # C) WPM (clearer penalty)
    wpm = (len(transcript.split()) / duration) * 60 if duration > 0 else 0

    if wpm < 115:
        score -= min((115 - wpm) * 0.2, 10.0)
        feedback.append(f"Pace is slow ({wpm:.0f} WPM).")
    elif wpm > 155:
        score -= min((wpm - 155) * 0.4, 15.0)
        feedback.append(f"Pace is fast ({wpm:.0f} WPM). Slow down.")

    # D) Energy consistency
    if signals["long_speech_blocks"] > 0:
        pen = min(signals["long_speech_blocks"] * 4.0, 10.0)
        score -= pen
        feedback.append("Break long explanations with pauses.")

    # -------------------------
    # 4. VOICE MODULATION
    # -------------------------
    monotone_score = pitch_data.get("monotone_score", 0.0)
    if monotone_score > 0.6:
        pen = monotone_score * 8.0
        score -= pen
        feedback.append("Voice sounds monotone. Add variation.")

    # -------------------------
    # 5. SENTIMENT (POLISH ONLY)
    # -------------------------
    if sentiment_res:
        label = sentiment_res[0]["label"]
        conf = sentiment_res[0]["score"]

        if label == "POSITIVE" and conf > 0.9:
            score += 1.5
            feedback.append("Positive tone.")
        elif label == "NEGATIVE" and conf > 0.9:
            score -= 5.0
            feedback.append("Tone sounds uncertain.")

    # -------------------------
    # 6. CONFIDENCE CEILING
    # -------------------------
    # High hedging should cap final score
    if hedges_per_min > 2.0:
        score = min(score, 78.0)

    # -------------------------
    # Final clamp
    # -------------------------
    score = max(0.0, min(100.0, score))
    # Absolute realism cap
    if score > 95:
        score = 95.0


    return InterviewScore(
        total_score=score,
        metrics={
            **signals,
            "wpm": wpm,
            "fillers_per_min": fillers_per_min,
            "monotone_score": monotone_score,
        },
        feedback=feedback,
    )


In [7]:
# ============================================
# 7) Main Runner (Final)
# ============================================

_SENT_PIPE = None  # lazy-loaded sentiment pipeline
_PIPELINE_AVAILABLE = pipeline is not None


def _sample_text_for_sentiment(text: str, max_len: int = 512) -> str:
    """Sample beginning + middle + end for fair sentiment."""
    if len(text) <= max_len:
        return text

    part = max_len // 3
    return (
        text[:part] +
        text[len(text)//2 - part//2 : len(text)//2 + part//2] +
        text[-part:]
    )


def analyze_interview(audio_path: str):
    print(f"--- Analyzing {os.path.basename(audio_path)} ---")

    # -------------------------
    # 1. Load Audio & Pitch
    # -------------------------
    print("Loading audio...")
    audio, sr = load_audio_mono(audio_path)
    pitch_data = analyze_pitch_dynamics(audio, sr)

    # -------------------------
    # 2. Transcription
    # -------------------------
    print("Transcribing...")
    tr = transcribe_audio(audio_path, model_size="medium")

    if tr is None:
        print("[ERROR] Transcription failed. Whisper returned None.")
        return None

    if not tr.text or not tr.text.strip():
        print("[ERROR] Empty transcription. Audio may be silent or corrupted.")
        return None

    # print("\n--- FULL TRANSCRIPT ---")
    # print(tr.text)
    # print("--- END TRANSCRIPT ---\n")

    # Use effective spoken duration
    duration = tr.duration if tr.duration > 0 else len(audio) / sr

    # -------------------------
    # 3. Linguistic Signals
    # -------------------------
    print("Analyzing linguistic signals...")
    signals = detect_signals(tr.text, tr.segments)

    # -------------------------
    # 4. Sentiment (Optional)
    # -------------------------
    sent_res = None
    if _PIPELINE_AVAILABLE:
        global _SENT_PIPE
        if _SENT_PIPE is None:
            _SENT_PIPE = pipeline(
                "sentiment-analysis",
                model="distilbert-base-uncased-finetuned-sst-2-english",
                device=-1
            )

        sent_text = _sample_text_for_sentiment(tr.text)
        sent_res = _SENT_PIPE(sent_text)

    # -------------------------
    # 5. CS Score
    # -------------------------
    result = calculate_score(
        transcript=tr.text,
        duration=duration,
        signals=signals,
        pitch_data=pitch_data,
        sentiment_res=sent_res
    )

    # -------------------------
    # 6. Report (unchanged)
    # -------------------------
    print("\n=== INTERVIEW REPORT ===")
    print(f"OVERALL SCORE: {result.total_score:.1f} / 100")

    print("\n--- Feedback ---")
    for f in result.feedback:
        print(f"[ ] {f}")

    print("\n--- Detailed Metrics ---")
    for k, v in result.metrics.items():
        if isinstance(v, float):
            print(f"{k}: {v:.2f}")
        else:
            print(f"{k}: {v}")

    print("\n--- Debug Info ---")
    print(f"NLP mode: {NLP_MODE}")
    print(f"Speech duration (s): {duration:.2f}")
    print(f"Voiced ratio: {pitch_data.get('voiced_ratio', 'NA')}")

    # =========================
    # ‚úÖ RETURN VALUES FOR TCS
    # =========================
    return {
        "transcript": tr.text,
        "cs_score": result.total_score,
        "cs_result": result
    }


In [None]:
# -----------------------------
# TCS Model Configuration
# -----------------------------
TCS_MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
# os.environ["HF_TOKEN"] = ""

HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN is None:
    raise RuntimeError("HF_TOKEN environment variable not set")

# ---- Tokenizer ----
_tcs_tokenizer = AutoTokenizer.from_pretrained(
    TCS_MODEL_NAME,
    use_fast=True,
    token=HF_TOKEN
)

if _tcs_tokenizer.pad_token is None:
    _tcs_tokenizer.pad_token = _tcs_tokenizer.eos_token

# ---- Device & dtype ----
if DEVICE == "mps":
    _tcs_dtype = torch.float32
    _device_map = None
elif DEVICE == "cuda":
    _tcs_dtype = torch.float16
    _device_map = "auto"
else:
    _tcs_dtype = torch.float32
    _device_map = None

# ---- Model ----
_tcs_model = AutoModelForCausalLM.from_pretrained(
    TCS_MODEL_NAME,
    torch_dtype=_tcs_dtype,
    device_map=_device_map,
    token=HF_TOKEN
)

_tcs_model.eval()
torch.set_grad_enabled(False)


`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:06<00:00,  3.40s/it]


torch.autograd.grad_mode.set_grad_enabled(mode=False)

In [9]:
@dataclass
class TechnicalEvaluationResult:
    score: int
    band: str
    verdict: str

    issues: List[str] = field(default_factory=list)
    improvement_points: List[str] = field(default_factory=list)

    conceptual_score: int | None = None
    specificity_score: int | None = None
    confidence_score: int | None = None


In [10]:
from typing import List

def build_tcs_prompt(question: str | List[str], transcript: str) -> str:
    # Normalize question in case a list/array is passed
    if isinstance(question, list):
        question = next(
            (str(q).strip() for q in question if str(q).strip()),
            "Explain your approach to this problem."
        )
    else:
        question = str(question).strip() or "Explain your approach to this problem."

    return f"""
You are a senior technical interviewer conducting a mock interview.

You must evaluate the candidate STRICTLY based on:
1. The interview question provided
2. The candidate‚Äôs answer provided below

You have NO access to the candidate‚Äôs resume, background, or intent beyond
what is explicitly stated.

Your responsibilities:
1. Determine whether the candidate actually answered the question asked.
2. Evaluate technical correctness ONLY within the scope of the question.
3. Identify inaccuracies, misconceptions, missing fundamentals, or weak
   explanations relative to the question.
4. Provide transcript-grounded coaching feedback to improve correctness,
   relevance, and clarity.

STRICT EVALUATION RULES:
- Judge relevance: Penalize if the answer partially or fully misses the question.
- Judge correctness: Evaluate only what the candidate actually said.
- Do NOT infer unstated knowledge or intentions.
- Do NOT introduce new tools, technologies, metrics, or concepts.
- Do NOT penalize for advanced topics unless the question explicitly requires them.
- Avoid generic interview advice (e.g., ‚Äúpractice more‚Äù, ‚Äúbe confident‚Äù).

Interview Question:
{question}

Candidate Answer:
{transcript}

SCORING GUIDELINES:
- Score from 0 to 100 based on relevance + technical correctness.
- Use these bands:
  - Excellent: Fully answers the question with correct and clear explanation
  - Good: Answers the question correctly with minor gaps or imprecision
  - Partial: Addresses the question but with notable gaps or confusion
  - Weak: Poor alignment with the question or flawed understanding
  - Poor: Does not answer the question or is mostly incorrect

COACHING REQUIREMENTS:
- Provide at least 4 coaching points.
- EACH coaching point must reference:
  - something said in the answer, OR
  - something clearly missing relative to the question.
- If the answer is strong, focus on improving precision, structure, or depth
  without adding new content.

OUTPUT RULES:
- Start the response with '{{' and end with '}}'.
- Respond in STRICT JSON ONLY.
- Do NOT include markdown, explanations, or extra text.

JSON format:
{{
  "score": <int>,
  "band": "<Excellent|Good|Partial|Weak|Poor>",
  "verdict": "<1‚Äì2 sentence technical summary judging alignment with the question>",
  "issues": ["<question-relative technical issues or 'No major technical issues identified'>"],
  "improvement_points": ["<specific, question-grounded coaching points>"]
}}

Return only valid JSON.
"""


In [11]:
import json
import re

def extract_last_json(text: str) -> dict:
    """
    Extracts the LAST valid JSON object from LLM output.
    Handles multiple JSON blocks safely.
    """
    matches = re.findall(r"\{[\s\S]*?\}", text)
    if not matches:
        raise RuntimeError("No JSON object found in LLM output")

    for candidate in reversed(matches):
        try:
            return json.loads(candidate)
        except json.JSONDecodeError:
            continue

    raise RuntimeError("No valid JSON object could be parsed")


In [12]:
import json

def extract_valid_json_objects(text: str) -> list[dict]:
    """
    Extracts ALL fully balanced JSON objects from text
    and returns them as parsed dicts.
    """
    results = []
    stack = []
    start = None

    for i, ch in enumerate(text):
        if ch == "{":
            if not stack:
                start = i
            stack.append("{")

        elif ch == "}":
            if stack:
                stack.pop()
                if not stack and start is not None:
                    candidate = text[start:i+1]
                    try:
                        results.append(json.loads(candidate))
                    except json.JSONDecodeError:
                        pass
                    start = None

    return results


In [13]:
def _run_llm(prompt: str, max_new_tokens: int = 1600) -> dict:

    inputs = _tcs_tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=2536
    )

    inputs = {k: v.to(_tcs_model.device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = _tcs_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            eos_token_id=_tcs_tokenizer.eos_token_id,
            pad_token_id=_tcs_tokenizer.pad_token_id
        )

    print("[TCS] Model response received. Parsing output...")

    input_len = inputs["input_ids"].shape[1]
    decoded = _tcs_tokenizer.decode(
        outputs[0][input_len:],
        skip_special_tokens=True
    ).strip()

    # üî• ROBUST JSON EXTRACTION
    parsed_objects = extract_valid_json_objects(decoded)

    if parsed_objects:
        return parsed_objects[-1]  # ‚úÖ LAST FULL VALID JSON

    raise RuntimeError(
        "TCS LLM output could not be parsed into valid JSON.\n"
        "Raw output:\n" + decoded[-1000:]
    )


In [14]:
def bucket_tcs(score: int) -> str:
    if score >= 85:
        return "Excellent"
    elif score >= 75:
        return "Good"
    elif score >= 60:
        return "Partial"
    elif score >= 35:
        return "Weak"
    else:
        return "Poor"


In [15]:
@dataclass
class QuestionGenerationRequest:
    role: str
    experience: str
    company_type: str
    interview_round: str


In [16]:
def build_question_generation_prompt(req: QuestionGenerationRequest) -> str:
    return f"""
You are a professional interviewer.

Interview Context:
- Role: {req.role}
- Experience Level: {req.experience}
- Company Type: {req.company_type}
- Interview Round: {req.interview_round}

QUESTION COUNT RULES:
- Ask questions based on the Experience Level ,Role,Company Type , Interview Round.
- HR Round: exactly 6 questions
- Technical Round: exactly 8 questions
- DSA Round: exactly 7 questions
- Coding Round: exactly 5 questions
- Communication Round: exactly 5 questions


CRITICAL OUTPUT CONTRACT:
- Output ONLY one valid JSON object.
- Do NOT add any text before or after the JSON.
- Do NOT add explanations, notes, labels, or headings.
- Stop generating immediately after the final closing brace.
- If any extra text is added, the output is INVALID.

MANDATORY JSON FORMAT:
{{
  "questions": [
    "question_1",
    "question_2",
    "question_3",
    "question_4",
    "question_5"
  ]
}}

IMPORTANT:
- The number of questions in the array MUST exactly match the rule for the selected Interview Round.

Return the JSON now and stop.
""".strip()

In [17]:
def run_llm_question(prompt: str, max_new_tokens: int = 512) -> Dict:
    def _fix_and_load(block: str) -> Dict:
        cleaned = re.sub(r",\s*\}", "}", block)
        cleaned = re.sub(r",\s*\]", "]", cleaned)
        bracket_diff = cleaned.count("[") - cleaned.count("]")
        brace_diff = cleaned.count("{") - cleaned.count("}")
        if bracket_diff > 0:
            cleaned += "]" * bracket_diff
        if brace_diff > 0:
            cleaned += "}" * brace_diff
        return json.loads(cleaned)

    inputs = _tcs_tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=1024
    )

    inputs = {k: v.to(_tcs_model.device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = _tcs_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            eos_token_id=_tcs_tokenizer.eos_token_id,
            pad_token_id=_tcs_tokenizer.pad_token_id
        )

    # Decode only newly generated tokens
    input_len = inputs["input_ids"].shape[1]
    decoded = _tcs_tokenizer.decode(
        outputs[0][input_len:],
        skip_special_tokens=True
    ).strip()

    # Defensive JSON extraction: prefer last valid JSON block, clean trailing commas, balance braces
    json_blocks = re.findall(r"\{[\s\S]*?\}", decoded)
    for block in reversed(json_blocks):
        try:
            return json.loads(block)
        except json.JSONDecodeError:
            try:
                return _fix_and_load(block)
            except Exception:
                continue

    # If no block parsed, try from first brace onward
    if "{" in decoded:
        tail = decoded[decoded.find("{") :]
        try:
            return _fix_and_load(tail)
        except Exception:
            pass

    raise RuntimeError(
        "Question LLM returned invalid JSON.\nRaw output:\n" + decoded
    )

In [18]:
from typing import List, Dict

def generate_interview_questions(req: QuestionGenerationRequest) -> List[str]:
    response: Dict = run_llm_question(
        build_question_generation_prompt(req),
        max_new_tokens=512
    )

    if "questions" not in response:
        raise RuntimeError("LLM response missing 'questions' field")

    if not isinstance(response["questions"], list):
        raise RuntimeError("'questions' must be a list")

    questions = [str(q).strip() for q in response["questions"] if str(q).strip()]

    expected_counts = {
        "HR": 6,
        "Technical": 8,
        "DSA": 7,
        "Coding": 5,
        "Communication": 5,
    }
    expected = expected_counts.get(req.interview_round)

    if expected:
        if len(questions) > expected:
            questions = questions[:expected]
        elif len(questions) < expected:
            raise RuntimeError(
                f"Expected {expected} questions for {req.interview_round} round, got {len(questions)}. Raw: {response}"
            )

    return questions


def generate_interview_question(req: QuestionGenerationRequest) -> str:
    questions = generate_interview_questions(req)
    if not questions:
        raise RuntimeError("No questions returned by LLM")
    return questions[0]

In [19]:
req = QuestionGenerationRequest(
    role="Software Development Engineer",
    experience="Fresher",
    company_type="Service-Based",
    interview_round="HR"
)

questions = generate_interview_questions(req)

print("Generated Questions:")
for i, q in enumerate(questions, 1):
    print(f"{i}. {q}")


The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Generated Questions:
1. Can you tell us about your academic background?
2. What inspired you to pursue a career in software development?
3. How do you stay updated with the latest technologies and trends in the field?
4. Can you walk us through your resume and highlight your relevant skills?
5. What are your greatest strengths and weaknesses as a software developer?
6. Why do you want to work with our company?


In [20]:
# req = QuestionGenerationRequest(
#     role="Marketing Manager",
#     experience="1-3 years",
#     company_type="Product-Based",
#     interview_round="Technical Interview"
# )

# questions = generate_interview_questions(req)

# print("Generated Questions:")
# for i, q in enumerate(questions, 1):
#     print(f"{i}. {q}")


In [21]:
print(questions)

['Can you tell us about your academic background?', 'What inspired you to pursue a career in software development?', 'How do you stay updated with the latest technologies and trends in the field?', 'Can you walk us through your resume and highlight your relevant skills?', 'What are your greatest strengths and weaknesses as a software developer?', 'Why do you want to work with our company?']


In [22]:
from typing import List

def run_tcs_llm(
    transcript: str,
    question: str | List[str] | None = None
) -> dict:
    print("[TCS] Running technical correctness evaluation...")

    # Safe default question
    if question is None:
        question = "Explain your approach to this problem."

    return _run_llm(
        build_tcs_prompt(question, transcript),
        max_new_tokens=1600
    )


In [23]:
def compute_tcs(transcript: str, question: str | List[str] | None = None) -> TechnicalEvaluationResult:
    raw = run_tcs_llm(transcript, question)

    # ---- Score ----
    if "score" not in raw:
        raise RuntimeError(f"TCS output missing 'score'. Raw response: {raw}")

    score = int(raw["score"])
    score = max(0, min(score, 100))

    band = bucket_tcs(score)
    verdict = raw.get("verdict", "").strip()

    # ---- Issues ----
    issues = raw.get("issues", [])
    if not isinstance(issues, list) or not issues:
        issues = ["No major technical issues identified."]

    # ---- Coaching points ----
    improvement_points = raw.get("improvement_points", [])
    if not isinstance(improvement_points, list) or not improvement_points:
        improvement_points = [
            "Improve clarity and specificity while explaining technical decisions."
        ]
    
    print(question)

    return TechnicalEvaluationResult(
        score=score,
        band=band,
        verdict=verdict,
        issues=issues,
        improvement_points=improvement_points
    )

In [24]:
def combine_cs_tcs(cs_score: float, tcs: TechnicalEvaluationResult) -> float:
    # ---- Weighted fusion (technical slightly dominant) ----
    final_score = 0.6 * cs_score + 0.4 * tcs.score

    # ---- Interview realism constraints ----
    if tcs.band == "Poor":
        final_score = min(final_score, 45.0)
    elif tcs.band == "Weak":
        final_score = min(final_score, 60.0)
    elif tcs.band == "Partial":
        final_score = min(final_score, 82.0)  # slightly stricter than 85

    # ---- Absolute realism bounds ----
    final_score = min(final_score, max(cs_score, tcs.score))
    final_score = min(final_score, 95.0)
    final_score = max(final_score, 0.0)

    return round(final_score, 1)


In [25]:
import re

def pretty_print_transcript(text: str, line_width: int = 100):
    words = text.split()
    line = []
    for w in words:
        line.append(w)
        if sum(len(x) + 1 for x in line) >= line_width:
            print(" ".join(line))
            line = []
    if line:
        print(" ".join(line))


In [26]:
from typing import List

def build_placement_coaching_prompt(question: str | List[str], transcript: str) -> str:
    if isinstance(question, list):
        question = next(
            (str(q).strip() for q in question if str(q).strip()),
            "Explain your approach to this problem."
        )
    else:
        question = str(question).strip() or "Explain your approach to this problem."

    return f"""
You are a senior placement officer reviewing a mock interview response.

You must evaluate the candidate STRICTLY based on:
- The interview transcript provided below
- Evidence explicitly present in the transcript

You have NO access to the candidate‚Äôs resume, background, or intent beyond
what is stated in the transcript.

YOUR RESPONSIBILITIES:
1. Identify concrete strengths demonstrated in the response.
2. Identify placement-relevant weaknesses or gaps visible in the response.
3. Provide focused coaching advice to improve placement readiness.

EVALUATION RULES:
- Base every point directly on the transcript.
- Do NOT invent skills, experience, or achievements.
- Do NOT add tools, technologies, or concepts not mentioned.
- Avoid generic advice (e.g., ‚Äúpractice more‚Äù, ‚Äúbe confident‚Äù).
- If evidence is limited, infer conservatively from what is missing.

MANDATORY OUTPUT REQUIREMENTS:
- "standout_strengths" MUST contain **3 to 4 distinct items**
- "top_improvements" MUST contain **3 to 4 distinct items**
- "current_gaps" MUST contain **at least 2 items**
- "actionable_improvements" MUST contain **at least 2 items**
- "placement_focus" MUST contain **at least 2 items**
- Each item must be concise and transcript-grounded

OUTPUT CONSTRAINTS:
- Return EXACTLY ONE JSON object.
- Start the response with '{{' and end with '}}'.
- Output STRICT JSON only (no text, no markdown).
- All values MUST be arrays of strings.
- Keep each point short and specific (no long paragraphs).

Interview Transcript:
{transcript}

JSON format (FOLLOW EXACTLY):

{{
  "standout_strengths": [
    "<strength 1>",
    "<strength 2>",
    "<strength 3>",
    "<optional strength 4>"
  ],
  "top_improvements": [
    "<improvement 1>",
    "<improvement 2>",
    "<improvement 3>",
    "<optional improvement 4>"
  ],
  "placement_coaching": {{
    "current_gaps": [
      "<gap 1>",
      "<gap 2>"
    ],
    "actionable_improvements": [
      "<actionable advice 1>",
      "<actionable advice 2>"
    ],
    "placement_focus": [
      "<focus area 1>",
      "<focus area 2>"
    ]
  }}
}}

Return only valid JSON.
"""


In [27]:
from typing import List

def run_placement_coaching_llm(
    transcript: str,
    question: str | List[str] | None = None
) -> dict:
    print("[COACH] Running placement coaching...")

    prompt = build_placement_coaching_prompt(question, transcript)

    try:
        # _run_llm MUST return a valid dict
        output = _run_llm(prompt, max_new_tokens=800)

        if not isinstance(output, dict):
            raise RuntimeError("LLM output is not a JSON object")

        return output

    except Exception as e:
        print("[COACH][WARN] Placement coaching failed. Using hard fallback.")
        print(str(e))

        return {
            "standout_strengths": [
                "Participated actively in the interview"
            ],
            "top_improvements": [
                "Improve clarity and confidence in explanations"
            ],
            "placement_coaching": {
                "current_gaps": [
                    "Responses lack structured depth"
                ],
                "actionable_improvements": [
                    "Practice explaining answers step-by-step"
                ],
                "placement_focus": [
                    "Communication clarity and interview readiness"
                ]
            }
        }


In [28]:
from typing import List

def generate_placement_feedback(
    transcript: str,
    question: str | List[str] | None = None
) -> dict:

    raw = run_placement_coaching_llm(transcript, question)

    # ---- Safe list extraction (NON-DESTRUCTIVE) ----
    def ensure_list(value, fallback):
        if isinstance(value, list) and len(value) > 0:
            return value
        return [fallback]

    standout_strengths = ensure_list(
        raw.get("standout_strengths"),
        "Shows basic engagement during the interview"
    )

    top_improvements = ensure_list(
        raw.get("top_improvements"),
        "Needs more structured and confident explanations"
    )

    # ---- Normalize placement coaching ----
    placement_raw = raw.get("placement_coaching", {})

    placement = {
        "current_gaps": ensure_list(
            placement_raw.get("current_gaps"),
            "Lacks depth or clarity in some responses"
        ),
        "actionable_improvements": ensure_list(
            placement_raw.get("actionable_improvements"),
            "Practice explaining answers step-by-step with examples"
        ),
        "placement_focus": ensure_list(
            placement_raw.get("placement_focus"),
            "Focus on communication clarity and interview readiness"
        ),
    }

    return {
        "standout_strengths": standout_strengths,
        "top_improvements": top_improvements,
        "placement_coaching": placement
    }


In [29]:
# ============================
# CELL 1: COMMUNICATION SCORE
# ============================

audio_path = "outputs/testing_audio.wav"

cs_out = analyze_interview(audio_path)

if cs_out is None:
    raise RuntimeError("CS analysis failed.")

# ---- Extract only what TCS needs ----
transcript = cs_out["transcript"]
cs_score = cs_out["cs_score"]

print("=== COMMUNICATION SCORE (CS) ===")
print(f"CS Score      : {cs_score}")
print("\n--- TRANSCRIPT ---")
pretty_print_transcript(transcript)
print("--- END TRANSCRIPT ---")


--- Analyzing testing_audio.wav ---
Loading audio...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Transcribing...


  mel_spec = self.mel_filters @ magnitudes
  mel_spec = self.mel_filters @ magnitudes
  mel_spec = self.mel_filters @ magnitudes


Analyzing linguistic signals...


Device set to use cpu



=== INTERVIEW REPORT ===
OVERALL SCORE: 95.0 / 100

--- Feedback ---
[ ] Good ownership language detected.
[ ] Positive tone.

--- Detailed Metrics ---
filler_count: 0
hedge_count: 0
own_count: 1
passive_count: 0
apology_count: 0
long_pauses: 0
long_speech_blocks: 0
wpm: 136.97
fillers_per_min: 0.00
monotone_score: 0.00

--- Debug Info ---
NLP mode: spacy
Speech duration (s): 116.08
Voiced ratio: 0.44386351128233353
=== COMMUNICATION SCORE (CS) ===
CS Score      : 95.0

--- TRANSCRIPT ---
Hello, I am Suhan Kumar. I am currently pursuing a bachelor's degree in computer science engineering
with a specialization in artificial intelligence and machine learning. Through my coursework I have
built a strong foundation in core computer science of subjects such as programming, data structures,
databases and software fundamentals. Along with academics, I have applied these concepts in practical
projects which have helped me understand how theoretical knowledge translates into real-world applica

In [30]:
# =========================================
# CELL 2: TCS + FINAL AGGREGATION
# =========================================

# ---- Run Technical Correctness Scoring (SINGLE LLM CALL) ----
tcs = compute_tcs(transcript ,questions)

# ---- Combine CS + TCS ----
final_score = combine_cs_tcs(cs_score, tcs)

# ---- Pretty Output ----
print("\n" + "=" * 45)
print("TECHNICAL CORRECTNESS (TCS)")
print("=" * 45)
print(f"Score : {tcs.score}")
print(f"Band  : {tcs.band}")

print("\nVerdict:")
print(tcs.verdict)

print("\nIssues Identified:")
for issue in tcs.issues:
    print(f"- {issue}")

print("\n" + "=" * 45)
print("COACHING FEEDBACK")
print("=" * 45)
for point in tcs.improvement_points:
    print(f"- {point}")

print("\n" + "=" * 45)
print("FINAL INTERVIEW SCORE")
print("=" * 45)
print(final_score)


[TCS] Running technical correctness evaluation...
[TCS] Model response received. Parsing output...
['Can you tell us about your academic background?', 'What inspired you to pursue a career in software development?', 'How do you stay updated with the latest technologies and trends in the field?', 'Can you walk us through your resume and highlight your relevant skills?', 'What are your greatest strengths and weaknesses as a software developer?', 'Why do you want to work with our company?']

TECHNICAL CORRECTNESS (TCS)
Score : 70
Band  : Partial

Verdict:
The candidate's answer was overly long and meandering, making it difficult to follow their train of thought, and they did not directly address the question.

Issues Identified:
- The candidate's answer was overly long and meandering, making it difficult to follow their train of thought.
- The candidate did not directly answer the question, instead, they provided a detailed description of their skills and interests.
- The candidate did no

In [31]:
# ---- Placement Coaching ----
coaching = generate_placement_feedback(transcript, questions)

print("\n" + "=" * 45)
print("RAW COACHING OBJECT")
print("=" * 45)
print(coaching)

# ---- Standout Strengths ----
print("\n" + "=" * 45)
print("STANDOUT STRENGTHS")
print("=" * 45)
for s in coaching["standout_strengths"]:
    print(f"- {s}")

# ---- Top Improvements ----
print("\n" + "=" * 45)
print("TOP IMPROVEMENTS")
print("=" * 45)
for i in coaching["top_improvements"]:
    print(f"- {i}")

# ---- Placement Coaching Insights ----
placement = coaching["placement_coaching"]

print("\n" + "=" * 45)
print("PLACEMENT COACHING INSIGHTS")
print("=" * 45)

print("\nWhere the candidate currently lags:")
for g in placement["current_gaps"]:
    print(f"- {g}")

print("\nWhat should be improved next:")
for a in placement["actionable_improvements"]:
    print(f"- {a}")

print("\nAreas to focus for placements:")
for f in placement["placement_focus"]:
    print(f"- {f}")


[COACH] Running placement coaching...
[TCS] Model response received. Parsing output...

RAW COACHING OBJECT
{'standout_strengths': ['strong technical fundamentals', 'disciplined learning mindset', 'ability to adapt quickly', 'problem-solving skills'], 'top_improvements': ['lack of specific tools and technologies mentioned', 'limited experience with back-end development', 'inadequate emphasis on soft skills', 'insufficient detail on project experience'], 'placement_coaching': {'current_gaps': ['back-end development skills', 'soft skills development'], 'actionable_improvements': ['Research and list 3-5 relevant tools and technologies', 'Highlight 2-3 relevant soft skills and provide examples'], 'placement_focus': ['Technical skills assessment', 'Soft skills development and assessment']}}

STANDOUT STRENGTHS
- strong technical fundamentals
- disciplined learning mindset
- ability to adapt quickly
- problem-solving skills

TOP IMPROVEMENTS
- lack of specific tools and technologies mentione