In [1]:
import sys, os
from pathlib import Path
sys.path.append("..")

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from PIL import Image
import numpy as np
import csv

from src.models.face_cnn import FaceCNN
from src.video_to_frames import extract_frames

device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
print("Using device:", device)


Using device: mps


In [2]:
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((48, 48)),
    transforms.ToTensor()
])

train_data = datasets.ImageFolder("../data/raw/fer2013/train", transform=transform)
test_data  = datasets.ImageFolder("../data/raw/fer2013/test",  transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_data,  batch_size=32, shuffle=False)

emotion_labels = train_data.classes
print("Classes:", emotion_labels)


Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [3]:
model = FaceCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    loss_sum = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        out = model(images)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()
        loss_sum += loss.item()
    print(f"Epoch {epoch+1} | Loss: {loss_sum/len(train_loader):.4f}")

os.makedirs("../models", exist_ok=True)
torch.save(model.state_dict(), "../models/emotion_cnn.pth")
print("✅ Emotion CNN model saved!")


Epoch 1 | Loss: 1.6836
Epoch 2 | Loss: 1.4630
Epoch 3 | Loss: 1.3391
Epoch 4 | Loss: 1.2449
Epoch 5 | Loss: 1.1667
Epoch 6 | Loss: 1.0902
Epoch 7 | Loss: 1.0210
Epoch 8 | Loss: 0.9435
Epoch 9 | Loss: 0.8777
Epoch 10 | Loss: 0.8062
✅ Emotion CNN model saved!


In [4]:
model = FaceCNN().to(device)
model.load_state_dict(torch.load("../models/emotion_cnn.pth", map_location=device))
model.eval()
print("✅ Emotion CNN loaded for inference")


✅ Emotion CNN loaded for inference


In [5]:
def predict_emotion(frame_path):
    img = Image.open(frame_path).convert("L")
    img = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        out = model(img)
        probs = F.softmax(out, dim=1).cpu().numpy()[0]
    return probs


In [6]:
from src.body_language_yolo import extract_yolo_pose
import numpy as np
import os
from pathlib import Path

NUM_EMOTION_FEATURES = 23
NUM_BODY_FEATURES = 7
TOTAL_FEATURES = NUM_EMOTION_FEATURES + NUM_BODY_FEATURES   # = 30

def process_video_to_features(video_path, class_label, return_features=False, fps=2):
    """
    Extract exactly 30 features per video:
        - 23 emotion features
        - 7 body-language features
    """
    video_name = Path(video_path).stem
    frame_dir = Path(f"../data/frames/{video_name}")
    frame_dir.mkdir(parents=True, exist_ok=True)

    extract_frames(str(video_path), str(frame_dir), fps=fps)

    frames = sorted(f for f in os.listdir(frame_dir) if f.endswith(".jpg"))

    if len(frames) == 0:
        print(f"⚠️ No frames extracted: {video_name}")
        return None

    emotion_list = []
    pose_list = []

    for f in frames:
        fp = str(frame_dir / f)

        # Emotion
        emot = predict_emotion(fp)
        emotion_list.append(emot)

        # Pose
        pose = extract_yolo_pose(fp)
        if pose is None:
            pose = np.zeros(51)   # fail-safe
        pose_list.append(pose)

    emotion_timeline = np.array(emotion_list)  # (N,7)
    pose_timeline    = np.array(pose_list)     # (N,51)

    # ---------- Emotion Features ----------
    dominant = np.argmax(emotion_timeline, axis=1)
    ratios = {e: 0 for e in emotion_labels}

    unique, counts = np.unique(dominant, return_counts=True)
    for idx, c in zip(unique, counts):
        ratios[emotion_labels[idx]] = c / len(dominant)

    transitions = int(np.sum(dominant[:-1] != dominant[1:]))
    volatility = float(np.mean(np.abs(np.diff(emotion_timeline, axis=0))))

    peaks = emotion_timeline.max(axis=0)
    vars_ = emotion_timeline.var(axis=0)

    emotion_features = (
        [float(ratios[e]) for e in emotion_labels] +
        [float(transitions), float(volatility)] +
        [float(v) for v in peaks] +
        [float(v) for v in vars_]
    )

    # Ensure 23 features
    emotion_features = emotion_features[:23]

    # ---------- Body Language Features ----------
    movement = np.linalg.norm(np.diff(pose_timeline, axis=0), axis=1)
    movement_mean = float(movement.mean())
    movement_var  = float(movement.var())
    movement_max  = float(movement.max())

    left_shoulder  = pose_timeline[:, 5*3:5*3+2]
    right_shoulder = pose_timeline[:, 6*3:6*3+2]
    shoulder_var   = float(np.linalg.norm(left_shoulder - right_shoulder, axis=1).var())

    nose = pose_timeline[:, 0:3]
    head_speed = np.linalg.norm(np.diff(nose, axis=0), axis=1)
    head_var = float(head_speed.var())

    left_wrist  = pose_timeline[:, 9*3:9*3+3]
    right_wrist = pose_timeline[:, 10*3:10*3+3]
    nose3 = pose_timeline[:, 0:3]

    lw = np.linalg.norm(left_wrist - nose3, axis=1)
    rw = np.linalg.norm(right_wrist - nose3, axis=1)

    hand_face_min  = float(min(lw.min(), rw.min()))
    hand_face_mean = float((lw.mean() + rw.mean()) / 2)

    body_features = [
        movement_mean, movement_var, movement_max,
        shoulder_var, head_var,
        hand_face_min, hand_face_mean
    ]

    features = emotion_features + body_features   # ALWAYS 30

    if return_features:
        return np.array(features), class_label

    return np.array(features), class_label


In [7]:
import glob
from sklearn.model_selection import train_test_split

truth = sorted(glob.glob("../videos/truth/*.mp4"))
lie   = sorted(glob.glob("../videos/lie/*.mp4"))

train_truth, test_truth = train_test_split(truth, test_size=0.2, random_state=42)
train_lie,   test_lie   = train_test_split(lie,   test_size=0.2, random_state=42)

print("Train truth:", len(train_truth))
print("Train lie:", len(train_lie))
print("Test truth:", len(test_truth))
print("Test lie:", len(test_lie))


Train truth: 48
Train lie: 48
Test truth: 12
Test lie: 13


In [8]:
train_csv = "../data/deception_train.csv"

with open(train_csv, "w") as f:
    csv.writer(f).writerow([f"f{i}" for i in range(30)] + ["label"])

for v in train_truth:
    feats, lab = process_video_to_features(v, 1, return_features=True)
    with open(train_csv, "a") as f:
        csv.writer(f).writerow(list(feats) + [lab])

for v in train_lie:
    feats, lab = process_video_to_features(v, 0, return_features=True)
    with open(train_csv, "a") as f:
        csv.writer(f).writerow(list(feats) + [lab])


✅ Extracted 78 frames from ../videos/truth/trial_truth_032.mp4 → ../data/frames/trial_truth_032
✅ Extracted 175 frames from ../videos/truth/trial_truth_004.mp4 → ../data/frames/trial_truth_004
✅ Extracted 63 frames from ../videos/truth/trial_truth_053.mp4 → ../data/frames/trial_truth_053
✅ Extracted 15 frames from ../videos/truth/trial_truth_018.mp4 → ../data/frames/trial_truth_018
✅ Extracted 49 frames from ../videos/truth/trial_truth_009.mp4 → ../data/frames/trial_truth_009
✅ Extracted 154 frames from ../videos/truth/trial_truth_007.mp4 → ../data/frames/trial_truth_007
✅ Extracted 68 frames from ../videos/truth/trial_truth_041.mp4 → ../data/frames/trial_truth_041
✅ Extracted 75 frames from ../videos/truth/trial_truth_005.mp4 → ../data/frames/trial_truth_005
✅ Extracted 42 frames from ../videos/truth/trial_truth_044.mp4 → ../data/frames/trial_truth_044
✅ Extracted 15 frames from ../videos/truth/trial_truth_020.mp4 → ../data/frames/trial_truth_020
✅ Extracted 63 frames from ../videos/t

In [9]:
test_csv = "../data/deception_test.csv"

with open(test_csv, "w") as f:
    csv.writer(f).writerow([f"f{i}" for i in range(30)] + ["label"])

for v in test_truth:
    feats, lab = process_video_to_features(v, 1, return_features=True)
    with open(test_csv, "a") as f:
        csv.writer(f).writerow(list(feats) + [lab])

for v in test_lie:
    feats, lab = process_video_to_features(v, 0, return_features=True)
    with open(test_csv, "a") as f:
        csv.writer(f).writerow(list(feats) + [lab])


✅ Extracted 30 frames from ../videos/truth/trial_truth_001.mp4 → ../data/frames/trial_truth_001
✅ Extracted 66 frames from ../videos/truth/trial_truth_006.mp4 → ../data/frames/trial_truth_006
✅ Extracted 46 frames from ../videos/truth/trial_truth_037.mp4 → ../data/frames/trial_truth_037
✅ Extracted 74 frames from ../videos/truth/trial_truth_046.mp4 → ../data/frames/trial_truth_046
✅ Extracted 28 frames from ../videos/truth/trial_truth_014.mp4 → ../data/frames/trial_truth_014
✅ Extracted 63 frames from ../videos/truth/trial_truth_055.mp4 → ../data/frames/trial_truth_055
✅ Extracted 65 frames from ../videos/truth/trial_truth_034.mp4 → ../data/frames/trial_truth_034
✅ Extracted 60 frames from ../videos/truth/trial_truth_049.mp4 → ../data/frames/trial_truth_049
✅ Extracted 60 frames from ../videos/truth/trial_truth_013.mp4 → ../data/frames/trial_truth_013
✅ Extracted 45 frames from ../videos/truth/trial_truth_058.mp4 → ../data/frames/trial_truth_058
✅ Extracted 52 frames from ../videos/tru

In [10]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

train_df = pd.read_csv("../data/deception_train.csv")
test_df  = pd.read_csv("../data/deception_test.csv")

X_train, y_train = train_df.drop("label", axis=1), train_df["label"]
X_test,  y_test  = test_df.drop("label", axis=1),  test_df["label"]

clf = RandomForestClassifier(n_estimators=300, random_state=42)
clf.fit(X_train, y_train)

preds = clf.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, preds))
print("\nReport:\n", classification_report(y_test, preds))



Accuracy: 0.72

Report:
               precision    recall  f1-score   support

           0       0.71      0.77      0.74        13
           1       0.73      0.67      0.70        12

    accuracy                           0.72        25
   macro avg       0.72      0.72      0.72        25
weighted avg       0.72      0.72      0.72        25



In [11]:
import joblib
joblib.dump(clf, "../models/deception_classifier.pkl")
print("✅ Classifier saved!")


✅ Classifier saved!


In [13]:
X_sample, _ = process_video_to_features(
    "../videos/truth/sample_test_video.wmv",
    class_label=1,
    return_features=True
)

clf = joblib.load("../models/deception_classifier.pkl")

import pandas as pd
X_input = pd.DataFrame([X_sample], columns=clf.feature_names_in_)

prediction = clf.predict(X_input)[0]

print("Prediction:", "TRUTH" if prediction == 1 else "LIE")


✅ Extracted 88 frames from ../videos/truth/sample_test_video.wmv → ../data/frames/sample_test_video
Prediction: TRUTH
