## 1. Analisis Teks

In [54]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer("all-mpnet-base-v2")

ideal_answers = {
    1: "A comprehensive and clear explanation of a specific technical challenge faced during TensorFlow certification or ML work, including what happened and how it was solved, showing strong reasoning.",
    2: "A detailed explanation of personal experience using transfer learning in TensorFlow, including pretrained models, fine-tuning steps, project examples, and benefits.",
    3: "A comprehensive description of a complex TensorFlow model with architecture, preprocessing, optimization, regularization, and performance tuning.",
    4: "A clear explanation of how dropout is implemented in TensorFlow (such as tf.keras.layers.Dropout), how it works, and how it reduces overfitting.",
    5: "A step-by-step explanation of building a CNN with TensorFlow for image classification, covering preprocessing, conv/pool layers, compilation, training, and evaluation."
}


In [55]:
def similarity_to_score(similarity):
    if similarity > 0.75:
        return 4
    elif similarity > 0.55:
        return 3
    elif similarity > 0.35:
        return 2
    elif similarity > 0.20:
        return 1
    return 0


In [56]:
def generate_reason(score):
    if score == 4:
        return "Jawaban sangat lengkap, jelas, dan memenuhi semua poin rubric."
    elif score == 3:
        return "Jawaban cukup jelas dan relevan namun kurang mendalam."
    elif score == 2:
        return "Jawaban masih terlalu umum dan minim detail teknis."
    elif score == 1:
        return "Jawaban tidak membahas inti pertanyaan secara memadai."
    return "Jawaban tidak relevan atau tidak ada."


In [57]:
def score_interview_answer(transcript, qid):

    ideal = ideal_answers[qid]

    emb_ans = model.encode(transcript, convert_to_tensor=True)
    emb_ideal = model.encode(ideal, convert_to_tensor=True)

    similarity = util.cos_sim(emb_ans, emb_ideal).item()
    score = similarity_to_score(similarity)
    reason = generate_reason(score)

    return {
        "id": qid,
        "transcript": transcript,
        "similarity": similarity,
        "score": score,
        "reason": reason
    }


In [58]:
def evaluate_from_csv(df):
    results = []
    for i, row in df.iterrows():
        qid = i + 1
        transcript = row["transcript"]
        result = score_interview_answer(transcript, qid)
        results.append(result)
    return results


## 2. Analisis audio

In [59]:
def score_fluency(row):
    score = 0

    # Tempo (semakin tinggi → semakin lancar)
    if row["tempo"] > 150:
        score += 2
    elif row["tempo"] > 120:
        score += 1

    # ZCR (semakin rendah → lebih smooth)
    if row["zcr"] < 0.13:
        score += 1

    # Energy (lebih besar → suara lebih kuat dan stabil)
    if row["energy"] > 0.0043:
        score += 1

    # Normalize maksimal 4
    if score > 4:
        score = 4

    return score


In [60]:
def format_audio_results(df):
    formatted = []

    for _, row in df.iterrows():
        formatted.append({
            "video": row["video"],

            "audio": {
                "fluency_score": int(row["fluency_score"]),
                "voice_similarity": float(row["voice_similarity_score"]),
                "cheating_suspected": bool(row["cheating_suspected"])
            }
        })

    return formatted

##3.  Analisis gestur tubuh

In [61]:
def score_gesture(row):
    score = 0

    # 1. EAR (Eye Aspect Ratio) → stabil & tinggi = fokus
    if row["ear_mean"] > 0.20:
        score += 1

    # 2. Blink reasonable (tidak terlalu banyak)
    # typical blink count per minute: 10–25
    if row["blink_sum"] < 20:
        score += 1

    # 3. Head movement stable
    if row["head_movement_mean"] < 0.30:
        score += 1

    # 4. Gaze balance (tidak dominan ke satu sisi)
    gaze_balance = abs(row["gaze_left_prop"] - row["gaze_right_prop"])
    if gaze_balance < 0.30:   # balanced
        score += 1

    return min(score, 4)


In [62]:
def detect_gesture_anomaly(df):

    gesture_cols = [
        "ear_mean", "ear_min", "ear_max",
        "blink_sum",
        "head_movement_mean", "head_movement_min", "head_movement_max",
        "gaze_left_prop", "gaze_right_prop"
    ]

    z = (df[gesture_cols] - df[gesture_cols].mean()) / df[gesture_cols].std()

    anomaly = z.abs().max(axis=1) > 2.5

    return anomaly


In [63]:
def format_gesture_results(df):
    results = []

    for _, row in df.iterrows():
        results.append({
            "video": row["video"],
            "gesture": {
                "gesture_score": int(row["gesture_score"]),
                "anomaly_detected": bool(row["gesture_anomaly"])
            }
        })

    return results


## Final Score

In [64]:
def compute_final_score(text_score, audio_score, gesture_score):
    weighted = (text_score * 0.5) + (audio_score * 0.3) + (gesture_score * 0.2)
    final_score = round(weighted * (10/4), 2)   # convert 0–4 → 0–10
    return final_score


In [69]:
def decision_from_score(score):
    if score >= 8.0:
        return "AI_PASSED"
    elif score >= 5.0:
        return "REVIEW_NEEDED"
    return "AI_FAILED"


In [66]:
def merge_scores(text_list, audio_list, gesture_list):
    final_output = []

    for i in range(len(text_list)):
        t = text_list[i]
        a = audio_list[i]
        g = gesture_list[i]

        final_score = compute_final_score(
            text_score=t["score"],
            audio_score=a["audio"]["fluency_score"],
            gesture_score=g["gesture"]["gesture_score"]
        )

        decision = decision_from_score(final_score)

        final_output.append({
            "video": t["video"],

            "scores": {
                "text": t["score"],
                "audio": a["audio"]["fluency_score"],
                "gesture": g["gesture"]["gesture_score"],
                "final_score": final_score,
                "decision": decision
            },

            "details": {
                "text": {
                    "similarity": t["similarity"],
                    "reason": t["reason"]
                },
                "audio": {
                    "voice_similarity": a["audio"]["voice_similarity"],
                    "cheating_suspected": a["audio"]["cheating_suspected"]
                },
                "gesture": {
                    "anomaly_detected": g["gesture"]["anomaly_detected"]
                }
            }
        })

    return final_output


## Pipeline + inference

In [67]:
def run_full_pipeline(df):

    # --- TEXT SCORING ---
    # `evaluate_from_csv` expects a DataFrame directly
    initial_text_results = evaluate_from_csv(df)
    # Enrich text_results with 'video' column for merging
    text_results_with_video = []
    for i, t_score_item in enumerate(initial_text_results):
        temp_t_score_item = t_score_item.copy()
        temp_t_score_item["video"] = df.loc[i, "video"]
        text_results_with_video.append(temp_t_score_item)
    text_results = text_results_with_video

    # --- AUDIO SCORING (fungsi lama: format_audio_results) ---
    # Ensure fluency_score, voice_similarity_score, and cheating_suspected are calculated first
    df["fluency_score"] = df.apply(score_fluency, axis=1)
    mfcc_cols = [f"mfcc_{i}" for i in range(1, 14)]
    mfcc_matrix = df[mfcc_cols].values
    from sklearn.metrics.pairwise import cosine_similarity
    similarity_matrix = cosine_similarity(mfcc_matrix)
    df["voice_similarity_score"] = similarity_matrix.mean(axis=1)
    threshold = df["voice_similarity_score"].mean() - 0.05 # Re-calculate threshold based on current df
    df["cheating_suspected"] = df["voice_similarity_score"] < threshold

    audio_results = format_audio_results(df)

    # --- GESTURE SCORING (fungsi lama: score_gesture + detect_gesture_anomaly + format) ---
    df["gesture_score"] = df.apply(score_gesture, axis=1)
    df["gesture_anomaly"] = detect_gesture_anomaly(df)
    gesture_results = format_gesture_results(df)

    # --- MERGE ALL (fungsi lama: merge_scores) ---
    final_results = merge_scores(text_results, audio_results, gesture_results)

    return final_results

def process_interview_data():
    print("Starting the interview data processing pipeline...")

    # 1. UPLOAD FILE CSV (Google Colab)
    from google.colab import files
    import pandas as pd
    import json
    from sklearn.metrics.pairwise import cosine_similarity # Ensure this is imported if not globally available

    print("Please upload your CSV file.")
    uploaded = files.upload()   # User choose CSV file
    filename = list(uploaded.keys())[0]

    df = pd.read_csv(filename)
    print("File loaded:", filename)

    # 2. RUN FULL PIPELINE
    # This assumes `run_full_pipeline` and all its dependencies (like model, ideal_answers, score_fluency, etc.)
    # are already defined in the notebook's global scope.
    final_output = run_full_pipeline(df)

    # 3. SAVE RESULTS TO JSON
    output_filename = "interview_results.json"

    with open(output_filename, "w") as f:
        json.dump(final_output, f, indent=4)

    print("JSON successfully saved as:", output_filename)

    # 4. DOWNLOAD FILE JSON
    files.download(output_filename)

    # 5. DISPLAY JSON CONTENT HERE
    print("\n--- Final JSON Results ---")
    with open(output_filename, "r") as f:
        results_json = json.load(f)
    print(json.dumps(results_json, indent=4))
    print("Pipeline execution completed.")

In [68]:
# Run the complete pipeline
process_interview_data()

Starting the interview data processing pipeline...
Please upload your CSV file.


Saving dataset_final(stt).csv to dataset_final(stt) (4).csv
File loaded: dataset_final(stt) (4).csv
JSON successfully saved as: interview_results.json


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


--- Final JSON Results ---
[
    {
        "video": "interview_question_1.webm",
        "scores": {
            "text": 3,
            "audio": 1,
            "gesture": 1,
            "final_score": 5.0,
            "decision": "REVIEW_NEEDED"
        },
        "details": {
            "text": {
                "similarity": 0.5639498233795166,
                "reason": "Jawaban cukup jelas dan relevan namun kurang mendalam."
            },
            "audio": {
                "voice_similarity": 0.9994826587858514,
                "cheating_suspected": false
            },
            "gesture": {
                "anomaly_detected": false
            }
        }
    },
    {
        "video": "interview_question_2.webm",
        "scores": {
            "text": 2,
            "audio": 4,
            "gesture": 1,
            "final_score": 6.0,
            "decision": "REVIEW_NEEDED"
        },
        "details": {
            "text": {
                "similarity": 0.547448396682

In [70]:
!pip freeze > requirements.txt
from google.colab import files
files.download("requirements.txt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>