In [1]:
# ================= AI INTERVIEW SYSTEM =================
# Features:
# - Uses MediaPipe for face landmarks
# - Captures audio reliably
# - Hybrid Whisper transcription (small → medium)
# - Gemini generates next questions from JD + Resume
# - Non-blocking TTS feedback while webcam live
# - Draws only main face contours (eyes/lips/outline)
# ======================================================

import os, time, queue, threading, contextlib, tempfile, re
import sounddevice as sd               # for audio recording
from scipy.io.wavfile import write, read
import cv2, numpy as np                # for webcam display and image ops

# ---------------- Whisper (ASR) ----------------
try:
    import whisper                     # Whisper automatic speech recognition
except:
    whisper = None

# ---------------- TTS ----------------
try:
    import pyttsx3                     # text-to-speech (local)
    TTS_AVAILABLE = True
except:
    pyttsx3 = None
    TTS_AVAILABLE = False

# ---------------- Gemini AI ----------------
try:
    import google.generativeai as genai     # Gemini API (if available)
    GEMINI_AVAILABLE = True
except:
    genai = None
    GEMINI_AVAILABLE = False

# ---------------- Resume Parsing ----------------
import PyPDF2, docx                     # PDF / DOCX reading
from tkinter import Tk
from tkinter.filedialog import askopenfilename

# ---------------- MediaPipe setup ----------------
# We rely on MediaPipe. If MediaPipe is missing, the program will still show the webcam
# feed but no face landmark drawing will be performed.
MP_AVAILABLE = False
try:
    import mediapipe as mp
    mp_face_mesh = mp.solutions.face_mesh
    mp_drawing = mp.solutions.drawing_utils
    MP_AVAILABLE = True
    print("MediaPipe available.")
except Exception as e:
    # For beginners: MediaPipe is optional here. If not installed, the app still runs
    # but face contours won't be drawn.
    print("MediaPipe not available:", e)

# ---------------- Config ----------------
AUDIO_FILENAME = os.path.join(tempfile.gettempdir(), "candidate_answer.wav")
SAMPLE_RATE = 16000                     # audio sampling rate (Hz)
SILENCE_DURATION = 10.0                 # seconds of silence to auto-stop recording
SILENCE_THRESHOLD = 0.005               # RMS threshold for detecting speech
WIN_W, WIN_H = 960, 540                 # window size for display
MAX_QUESTION_WORDS = 14                 # limit for generated questions
TECH_QUESTIONS = 3
HR_QUESTIONS = 1
TOTAL_QUESTIONS = 1 + TECH_QUESTIONS + HR_QUESTIONS
GEN_RETRY = 1                           # how many times to retry Gemini generation

# ---------------- Whisper models load ----------------
whisper_small, whisper_medium = None, None
if whisper:
    try:
        print("Loading Whisper small...")
        whisper_small = whisper.load_model("small")
        print("Small loaded.")
    except:
        whisper_small = None
    try:
        print("Loading Whisper medium...")
        whisper_medium = whisper.load_model("medium")
        print("Medium loaded.")
    except:
        whisper_medium = None

# ---------------- TTS async speak ----------------
def speak_async(text):
    """
    Speak text using pyttsx3 in a background thread so the webcam remains live.
    If pyttsx3 is not available, just print the text.
    """
    done = threading.Event()
    if not TTS_AVAILABLE:
        print("AI:", text)
        done.set()
        return done

    def _run():
        try:
            engine = pyttsx3.init()
            engine.setProperty('rate', 170)   # speak rate
            engine.say(str(text))
            engine.runAndWait()
        finally:
            done.set()

    threading.Thread(target=_run, daemon=True).start()
    return done

# ---------------- Text helpers ----------------
def clean_text(t):
    """Remove non-ASCII chars and extra whitespace."""
    if t is None:
        return ""
    t = re.sub(r'[^\x00-\x7F]+', ' ', str(t))
    return re.sub(r'\s+', ' ', t).strip()

def wrap_text(text, max_chars=40):
    """Wrap text into multiple lines for display on the frame."""
    if not text:
        return [""]
    words = text.split()
    lines, cur = [], ""
    for w in words:
        if len((cur + " " + w).strip()) <= max_chars:
            cur = (cur + " " + w).strip()
        else:
            lines.append(cur)
            cur = w
    if cur:
        lines.append(cur)
    return lines

# ---------------- Resume extractors ----------------
def extract_text_from_pdf(path):
    """Return text extracted from PDF (basic)."""
    text = ""
    try:
        with open(path, "rb") as f:
            r = PyPDF2.PdfReader(f)
            for p in r.pages:
                text += (p.extract_text() or "") + "\n"
    except:
        pass
    return clean_text(text)

def extract_text_from_docx(path):
    """Return text extracted from DOCX (basic)."""
    try:
        d = docx.Document(path)
        return clean_text("\n".join(p.text for p in d.paragraphs if p.text.strip()))
    except:
        return ""

def extract_text_from_file(path):
    """Auto-detect file type and extract text; supports PDF and DOCX."""
    if not path:
        return ""
    p = path.lower()
    if p.endswith(".pdf"):
        return extract_text_from_pdf(path)
    if p.endswith((".doc", ".docx")):
        return extract_text_from_docx(path)
    return ""

# ---------------- Audio recording ----------------
def record_audio(stop_flag, filename=AUDIO_FILENAME):
    """
    Record audio from default microphone until:
    - stop_flag['next'] is set by caller (e.g., user pressed a key), OR
    - a long silence is detected after speech.
    Audio is saved to `filename`.
    """
    q_audio = queue.Queue()
    frames = []
    last_loud = time.time()
    speech_started = False

    def _cb(indata, frames_count, time_info, status):
        # callback from sounddevice: push audio chunk into queue
        q_audio.put(indata.copy())

    try:
        with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=_cb):
            while not stop_flag["next"]:
                try:
                    data = q_audio.get(timeout=0.05)
                    frames.append(data)

                    # compute RMS to detect if user is speaking
                    rms = np.sqrt(np.mean(data**2))
                    if rms > SILENCE_THRESHOLD:
                        speech_started = True
                        last_loud = time.time()
                    elif speech_started and (time.time() - last_loud) > SILENCE_DURATION:
                        stop_flag["next"] = True
                except queue.Empty:
                    continue
    except Exception as e:
        print("Audio issue:", e)

    # if we recorded frames, save them to WAV
    if frames:
        audio = np.concatenate(frames, axis=0).flatten()
        audio /= (np.max(np.abs(audio)) + 1e-9)
        write(filename, SAMPLE_RATE, (audio * 32767).astype(np.int16))

# ---------------- Transcription ----------------
def transcribe_audio(filename=AUDIO_FILENAME):
    """
    Transcribe WAV using loaded Whisper model (small preferred, else medium).
    Returns plain text. If no model present or file missing, returns "".
    """
    if not os.path.exists(filename):
        return ""
    if whisper_small is None and whisper_medium is None:
        return ""
    model = whisper_small or whisper_medium
    with contextlib.redirect_stdout(None):
        try:
            r = model.transcribe(filename, fp16=False, language="en")
        except:
            return ""
    return clean_text(r.get("text", ""))

# ---------------- Gemini config ----------------
GEMINI_API_KEY = "AIzaSyDePhGmZ_nIoS8ibafMFTXCiI-HiTuGf08"  # keep/update as needed
gen_model = None
if GEMINI_AVAILABLE and GEMINI_API_KEY:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        gen_model = genai.GenerativeModel("gemini-2.0-flash")
    except Exception as e:
        print("Gemini init failed:", e)

# ---------------- AI question generator ----------------
def generate_next_question_background(jd_summary, resume_text, used_set, container, role):
    """
    Ask Gemini to generate ONE short interview question given JD + resume.
    The result is written to container[0] (thread-safe small container).
    """
    jd_short = jd_summary[:800] + "..." if len(jd_summary) > 800 else jd_summary
    resume_short = resume_text[:1200] + "..." if len(resume_text) > 1200 else resume_text

    prompt = (
        f"Generate ONE {role} interview question "
        f"(max {MAX_QUESTION_WORDS} words). JD: {jd_short}. Resume: {resume_short}. Previous: {list(used_set)}."
    )

    if gen_model is None:
        container[0] = None
        return

    for _ in range(GEN_RETRY):
        try:
            r = gen_model.generate_content(prompt)
            q = clean_text(r.text.split("\n")[0])
            q = re.sub(r'^(question\s*\d*[:\-]?)','', q, flags=re.I).strip().split('.')[0].strip()
            if q and q not in used_set:
                used_set.add(q)
                container[0] = q
                return
        except Exception:
            time.sleep(0.3)

    container[0] = None

# ---------------- AI feedback generation ----------------
def generate_feedback(question, answer):
    """Ask Gemini for a short positive feedback sentence for the answer."""
    if not gen_model:
        return "Thank you for your answer."
    prompt = (
        f"Provide brief positive feedback (max 10 words) "
        f"on '{answer}' for '{question}'. Only positive."
    )
    try:
        r = gen_model.generate_content(prompt)
        fb = clean_text(r.text.split("\n")[0])
        return fb or "Thank you for your answer."
    except:
        return "Thank you for your answer."

# ---------------- First question fixed feedback ----------------
def generate_first_answer_feedback(candidate_name, candidate_answer):
    """
    Per your request: ALWAYS return a fixed greeting for the first question.
    This ensures the first feedback is exactly "Nice to meet you, {candidate}".
    """
    return f"Nice to meet you, {candidate_name}."

# ---------------- Draw face contours (MediaPipe) ----------------
def draw_full_mesh_points(frame, face_landmarks):
    """
    Draw a simplified set of facial contours (outline, eyes, lips) using
    MediaPipe face landmarks. For beginners: face_landmarks is provided by MediaPipe.
    """
    ih, iw = frame.shape[:2]

    FACE_OUTLINE = [10,338,297,332,284,251,389,356,454,323,361,288,
                    397,365,379,378,400,377,152,148,176,149,150,
                    136,172,58,132,93,234,127,162,21,54,103,67,109]
    LEFT_EYE  = [33,160,158,133,153,144]
    RIGHT_EYE = [263,387,385,362,380,373]
    OUTER_LIPS = [61,146,91,181,84,17,314,405,321,375,291,308]
    INNER_LIPS = [78,95,88,178,87,14,317,402,318,324]

    def line(points, color):
        for i in range(len(points)-1):
            x1 = int(points[i].x * iw)
            y1 = int(points[i].y * ih)
            x2 = int(points[i+1].x * iw)
            y2 = int(points[i+1].y * ih)
            cv2.line(frame, (x1,y1), (x2,y2), color, 1)

    lm = face_landmarks.landmark
    line([lm[i] for i in FACE_OUTLINE], (255,255,0))
    line([lm[i] for i in LEFT_EYE],    (0,255,0))
    line([lm[i] for i in RIGHT_EYE],   (0,255,0))
    line([lm[i] for i in OUTER_LIPS],  (0,0,255))
    line([lm[i] for i in INNER_LIPS],  (0,100,255))

# ---------------- Main Interview Flow ----------------
def run_interview():
    """
    Main entry: ask candidate name, upload resume, then proceed through
    a sequence of questions with webcam, audio capture, transcription,
    feedback and generated next questions.
    """
    # Ask candidate name and request resume
    candidate = input("Enter your name: ").strip() or "Candidate"
    print(f"Welcome {candidate}! Upload your resume.")
    speak_async("Please upload your resume.")

    Tk().withdraw()
    resume_path = askopenfilename(title="Select Resume", filetypes=[("Documents","*.pdf *.docx")])
    resume_text = extract_text_from_file(resume_path) if resume_path else ""
    print("Resume successfully uploaded.\n")

    # A simple job description summary used to steer question generation
    jd_summary = "We are looking for a motivated AI/ML Engineer with 1–3 years of experience in ML projects."

    used = set()                        # keep track of used questions
    current_q = "Introduce yourself."   # fixed first question
    used.add(current_q)

    # Open webcam window
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Cannot open webcam.")
        return

    cv2.namedWindow("AI Interview", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("AI Interview", WIN_W, WIN_H)

    # Initialize MediaPipe FaceMesh if available
    face_mesh_instance = None
    if MP_AVAILABLE:
        face_mesh_instance = mp_face_mesh.FaceMesh(
            max_num_faces=1, refine_landmarks=False,
            min_detection_confidence=0.5, min_tracking_confidence=0.5
        )

    all_answers = []
    total_qs = TOTAL_QUESTIONS

    try:
        for q_index in range(1, total_qs + 1):
            # Show and speak the question
            print(f"\nQuestion {q_index}/{total_qs}: {current_q}")
            q_speak_evt = speak_async(f"Question {q_index}. {current_q}")

            # Keep webcam live while TTS speaks the question
            while not q_speak_evt.is_set():
                ret, frame = cap.read()
                if not ret:
                    continue

                # Display wrapped question text on frame
                wrapped = wrap_text(current_q, 40)
                y0 = 30
                for line in wrapped:
                    cv2.putText(frame, line, (20, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2)
                    y0 += 30

                cv2.putText(frame, "Listening shortly... (press 'q' to skip)", (20, WIN_H - 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200,200,200), 1)
                cv2.imshow("AI Interview", frame)
                if cv2.waitKey(1) & 0xFF in [ord('q'), 27]:
                    break

            # Start recording in background
            stop_flag = {"next": False}
            rec_thread = threading.Thread(target=record_audio, args=(stop_flag,), daemon=True)
            rec_thread.start()

            # While recording, show webcam and draw MediaPipe landmarks only if available
            while not stop_flag["next"]:
                ret, frame = cap.read()
                if not ret:
                    continue

                # MEDIAPIPE ONLY: if available, detect and draw face landmarks
                if MP_AVAILABLE and face_mesh_instance is not None:
                    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    results = face_mesh_instance.process(rgb)
                    if results.multi_face_landmarks:
                        for face_landmarks in results.multi_face_landmarks:
                            draw_full_mesh_points(frame, face_landmarks)
                # If MediaPipe is NOT available: we intentionally do NOT perform any face detection.
                # This keeps the code simple and avoids Haar cascades entirely.

                # Display the question text and recording status
                wrapped = wrap_text(current_q, 40)
                y0 = 30
                for line in wrapped:
                    cv2.putText(frame, line, (20, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2)
                    y0 += 30

                cv2.putText(frame, "Recording... (press 'q' to stop)", (20, WIN_H - 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 1)
                cv2.imshow("AI Interview", frame)

                key = cv2.waitKey(1) & 0xFF
                if key in [ord('q'), 27]:
                    stop_flag["next"] = True
                    break

            # Ensure recording thread finishes
            if rec_thread.is_alive():
                rec_thread.join(timeout=1.0)

            # Transcribe in background
            transcription = {"text": ""}
            trans_done_flag = threading.Event()
            def _transcribe():
                transcription["text"] = transcribe_audio(AUDIO_FILENAME)
                trans_done_flag.set()
            threading.Thread(target=_transcribe, daemon=True).start()

            # Keep webcam live while transcription happens
            while not trans_done_flag.is_set():
                ret, frame = cap.read()
                if not ret:
                    continue
                cv2.putText(frame, "Processing answer...", (20,40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,255), 2)
                cv2.imshow("AI Interview", frame)
                if cv2.waitKey(1) & 0xFF == 27:
                    break

            answer_text = transcription["text"]
            print("Answer:", answer_text)
            all_answers.append(answer_text)

            # Generate feedback (first question fixed greeting; others via Gemini)
            fb_container = [None]
            fb_done = threading.Event()
            def _gen_feedback():
                try:
                    if q_index == 1:
                        fb = generate_first_answer_feedback(candidate, answer_text)
                    else:
                        fb = generate_feedback(current_q, answer_text)
                    fb_container[0] = fb
                finally:
                    fb_done.set()
            threading.Thread(target=_gen_feedback, daemon=True).start()

            # Keep webcam live while feedback being generated
            while not fb_done.is_set():
                ret, frame = cap.read()
                if not ret:
                    continue
                cv2.putText(frame, "Generating feedback...", (20,40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,200,200), 2)
                cv2.imshow("AI Interview", frame)
                if cv2.waitKey(1) & 0xFF == 27:
                    break

            feedback = fb_container[0] or "Thank you for your answer."
            print("Feedback:", feedback)
            speak_evt = speak_async(feedback)

            # Keep webcam live while feedback is spoken
            while not speak_evt.is_set():
                ret, frame = cap.read()
                if not ret:
                    continue
                cv2.putText(frame, "Speaking feedback...", (20,40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (180,180,255), 2)
                cv2.imshow("AI Interview", frame)
                if cv2.waitKey(1) & 0xFF == 27:
                    break

            # Prepare next question (if not last)
            if q_index < total_qs:
                next_container = [None]
                nq_done = threading.Event()
                def _gen_nextq():
                    try:
                        role = "Technical" if q_index <= TECH_QUESTIONS else "HR"
                        generate_next_question_background(jd_summary, resume_text, used, next_container, role)
                    finally:
                        nq_done.set()
                threading.Thread(target=_gen_nextq, daemon=True).start()

                # Keep webcam live while next question generation is in progress
                while not nq_done.is_set():
                    ret, frame = cap.read()
                    if not ret:
                        continue
                    cv2.putText(frame, "Preparing next question...", (20,40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (200,200,100), 2)
                    cv2.imshow("AI Interview", frame)
                    if cv2.waitKey(1) & 0xFF == 27:
                        break

                # If Gemini fails, use a reasonable fixed next question based on role.
                role_fallback = "Technical" if q_index <= TECH_QUESTIONS else "HR"
                current_q = next_container[0] or (
                    "Tell me about a project from your resume." if role_fallback == "Technical"
                    else "Why do you want to join our company?"
                )
                used.add(current_q)

    finally:
        # cleanup
        cap.release()
        cv2.destroyAllWindows()
        if MP_AVAILABLE and face_mesh_instance:
            face_mesh_instance.close()

    print("\nInterview Completed.")
    speak_async(f"Thank you for the interview, {candidate}.")

# ---------------- Main entry ----------------
if __name__ == "__main__":
    run_interview()


  from .autonotebook import tqdm as notebook_tqdm


MediaPipe available.
Loading Whisper small...
Small loaded.
Loading Whisper medium...
Medium loaded.


Enter your name:  Mahesh


Welcome Mahesh! Upload your resume.
Resume successfully uploaded.


Question 1/5: Introduce yourself.
Answer: Good afternoon. This is Mahesh and I am from Jannagam. First of all, thank you for giving this wonderful opportunity to introduce myself. I'm passionate about data and technology and have developed skills in Python, SQL, data analytics, machine learning, deep learning and generate to AI. Thank you.
Feedback: Nice to meet you, Mahesh.

Question 2/5: Explain overfitting; how would you detect and prevent it in a model?
Answer: Overfitting means when the trainer crazy is more than the tester crazy it occurs overfitting problem. When we use L1, L2, regularization techniques to reduce the overfitting problem in regression problems. When I come to the deep learning we use dropout technique and hyperparametric tuning.
Feedback: Good explanation of overfitting, detection, and prevention techniques!

Question 3/5: Describe your experience applying TF-IDF for text vectorization in a previ