In [10]:
from transformers import pipeline
from moviepy.editor import VideoFileClip

def extract_audio(video_path, audio_path="audio.wav"):
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(
        audio_path,
        fps=16000,
        codec="pcm_s16le",
        verbose=False,
        logger=None
    )
    return audio_path

In [11]:
class SpeechTranscriber:
    def __init__(self):
        self.asr = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-large-v3",
            device=0
        )

    def transcribe(self, video_path):
        audio_path = extract_audio(video_path)
        result = self.asr(audio_path)
        return result["text"]

In [None]:
# import json

# class AnswerEvaluator:
#     def __init__(self, llm_pipeline):
#         self.llm = llm_pipeline

#     def evaluate(self, transcript, question):
#         prompt = f"""
# You are a senior technical interview evaluator.

# Evaluate the candidate answer strictly based on the question.
# Return ONLY one valid JSON object. No extra text.

# Question: "{question}"
# Candidate Answer: "{transcript}"

# JSON format:
# {{
#   "scores": {{
#     "technical_accuracy": 0,
#     "relevance": 0,
#     "depth_of_understanding": 0,
#     "applied_thinking": 0,
#     "clarity": 0,
#     "problem_solving": 0,
#     "communication": 0
#   }},
#   "final_average_score": 0.0,
#   "decision": "ACCEPT | REJECT",
#   "strengths": [],
#   "weaknesses": [],
#   "improvement_suggestions": []
# }}
# """

#         result = self.llm(prompt)[0]["generated_text"]

#         try:
#             start = result.find("{")
#             end = result.rfind("}") + 1
#             return json.loads(result[start:end])
#         except Exception as e:
#             return {
#                 "error": "Failed to parse JSON",
#                 "raw_output": result
#             }

In [12]:
from openai import OpenAI
import json
from dotenv import dotenv_values
from google.colab import userdata
import os
os.environ["OPENAI_API_KEY"] =OPENAI_API_KEY = "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"

In [19]:
class AnswerEvaluator:
    def __init__(self, api_key):
        self.client = OpenAI(api_key=api_key)

    def evaluate(self, transcript, question):
        transcript_clean = transcript.strip()
        prompt = f"""
You are a senior technical interview evaluation agent.

Your task is to objectively evaluate a CANDIDATE'S ANSWER that was TRANSCRIBED FROM VIDEO (speech-to-text) in response to an AI-generated interview question.

IMPORTANT TIME CONSTRAINT:
- The candidate had a MAXIMUM of 2 MINUTES to answer.
- Answers are expected to be concise.
- Lack of deep implementation details, extended examples, or full trade-off analysis is NORMAL.
- Do NOT penalize correct but brief explanations.

Speech context:
- The answer comes from spoken language.
- Minor grammar issues, filler words, pauses, or informal phrasing are expected.
- Ignore transcription artifacts and focus on meaning.

Evaluation rules:
- Evaluate ONLY what the candidate explicitly says.
- Do NOT infer unstated knowledge.
- Do NOT reward verbosity.
- Penalize incorrect concepts, misunderstandings, or vague hand-waving.
- Be consistent and conservative with high scores (9–10).

Evaluation criteria (score EACH from 0 to 10):

1. Technical Accuracy:
   - Core concepts are correct and not misleading.

2. Relevance:
   - Directly answers the question without going off-topic.

3. Depth of Understanding (Time-Aware):
   - Shows correct conceptual understanding appropriate for a 2-minute answer.
   - Deep details are optional.

4. Applied Thinking:
   - Mentions realistic techniques, tools, or system components.
   - Personal experience is NOT required.

5. Clarity:
   - Explanation is logically structured and easy to follow.

6. Reasoning & Problem Solving:
   - Demonstrates cause–effect understanding or logical flow.

7. Communication Quality:
   - Clear, confident, professional interview delivery.

Scoring instructions:
- Use the FULL 0–10 range.
- Scores of 9–10 require very strong clarity and correctness.
- Do NOT inflate scores for generic or memorized answers.
- Calculate the FINAL AVERAGE SCORE (rounded to 1 decimal place).

Final decision rules (BINARY ONLY):
- FINAL SCORE \u2265 7.0 \u2192 ACCEPT
- FINAL SCORE < 7.0 \u2192 REJECT

Output rules:
- Return JSON ONLY.
- Do NOT include any decision other than ACCEPT or REJECT.
Question: "{question}"
Candidate Answer: "{transcript_clean}"

Required JSON format:
{{
  "scores": {{
    "technical_accuracy":0 ,
    "relevance": ,
    "depth_of_understanding": ,
    "applied_thinking": ,
    "clarity": ,
    "problem_solving": ,
    "communication":
  }},
  "final_average_score": ,
  "decision": "ACCEPT | REJECT",
  "strengths": [],
  "weaknesses": [],
  "improvement_suggestions": []
}}

"""

        response = self.client.chat.completions.create(
            model="gpt-4.1",
            messages=[
                {"role": "system", "content": "You are a strict JSON-only evaluator."},
                {"role": "user", "content": prompt}
            ],
            temperature=0,
            response_format={"type": "json_object"}
        )

        return response.choices[0].message.content

In [14]:
# video_path = "x.mp4"

# # question
# question = "How would you improve the training stability of a neural network?"

# # 1) Transcription
# transcriber = SpeechTranscriber()
# transcript = transcriber.transcribe(video_path)

# print("=== TRANSCRIPT ===")
# print(transcript)

# # 2) Evaluation (من غير ما نبعت الفيديو تاني ❌)
# evaluator = AnswerEvaluator(llm_pipeline)
# evaluation = evaluator.evaluate(transcript, question)

# print("=== EVALUATION ===")
# print(json.dumps(evaluation, indent=2))

In [20]:
import os

video_path = "/content/WIN_20260101_21_20_12_Pro.mp4"
question = "explain what is overfitting?"

# 1) Transcription
transcriber = SpeechTranscriber()
transcript = transcriber.transcribe(video_path)

print("=== TRANSCRIPT ===")
print(transcript)

# 2) Evaluation with GPT-4.1
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OpenAI API key not found. Please set it in the environment variable 'OPENAI_API_KEY' or directly provide it.")

evaluator = AnswerEvaluator(api_key=api_key)
evaluation = evaluator.evaluate(transcript, question)

print("=== EVALUATION ===")
print(json.dumps(json.loads(evaluation), indent=2))

Device set to use cuda:0


=== TRANSCRIPT ===
 Overfading is the model trained on the data too much and it's complex more than the usual.
=== EVALUATION ===
{
  "scores": {
    "technical_accuracy": 1,
    "relevance": 3,
    "depth_of_understanding": 1,
    "applied_thinking": 0,
    "clarity": 2,
    "problem_solving": 0,
    "communication": 2
  },
  "final_average_score": 1.3,
  "decision": "REJECT",
  "strengths": [
    "Attempted to relate overfitting to model complexity."
  ],
  "weaknesses": [
    "Incorrect terminology ('overfading' instead of 'overfitting').",
    "Fails to explain the core concept of overfitting (poor generalization to new data).",
    "No mention of realistic techniques, tools, or system components.",
    "Lacks logical structure and clarity.",
    "No demonstration of reasoning or problem-solving."
  ],
  "improvement_suggestions": [
    "Use the correct term: 'overfitting.'",
    "Explain that overfitting occurs when a model learns the training data too well, including noise, and p