In [43]:
import sys, os
from pathlib import Path
sys.path.append("..")

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from PIL import Image
import numpy as np
import csv

from src.models.face_cnn import FaceCNN
from src.video_to_frames import extract_frames

device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
print("Using device:", device)


Using device: mps


In [44]:
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((48, 48)),
    transforms.ToTensor()
])

train_data = datasets.ImageFolder("../data/raw/fer2013/train", transform=transform)
test_data  = datasets.ImageFolder("../data/raw/fer2013/test", transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_data,  batch_size=32, shuffle=False)

print("Classes:", train_data.classes)


Classes: ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [45]:
model = FaceCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    model.train()
    loss_sum = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        out = model(images)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()
        loss_sum += loss.item()
    print(f"Epoch {epoch+1} | Loss: {loss_sum/len(train_loader):.4f}")

# Save model ONCE
os.makedirs("../models", exist_ok=True)
torch.save(model.state_dict(), "../models/emotion_cnn.pth")
print("✅ Model saved!")


Epoch 1 | Loss: 1.6935
Epoch 2 | Loss: 1.4555
Epoch 3 | Loss: 1.3263
Epoch 4 | Loss: 1.2478
Epoch 5 | Loss: 1.1782
Epoch 6 | Loss: 1.1170
Epoch 7 | Loss: 1.0583
Epoch 8 | Loss: 0.9998
Epoch 9 | Loss: 0.9388
Epoch 10 | Loss: 0.8854
✅ Model saved!


In [46]:
model = FaceCNN().to(device)
model.load_state_dict(torch.load("../models/emotion_cnn.pth", map_location=device))
model.eval()
print("✅ Model loaded for inference")


✅ Model loaded for inference


In [51]:
def predict_emotion(frame_path):
    img = Image.open(frame_path)
    img = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        out = model(img)
        probs = F.softmax(out, dim=1).cpu().numpy()[0]
    return probs


In [52]:
video_root = "../videos"
for class_name in ["lie", "truth"]:
    label = 0 if class_name == "lie" else 1
    folder = os.path.join(video_root, class_name)

    for video_file in os.listdir(folder):
        video_path = os.path.join(folder, video_file)
        frame_dir  = f"../data/frames/{video_file}"
        extract_frames(video_path, frame_dir, fps=2)

        frames = sorted(os.listdir(frame_dir))
        timeline = np.array([predict_emotion(f"{frame_dir}/{f}") for f in frames])

        dominant = np.argmax(timeline, 1)
        unique, counts = np.unique(dominant, return_counts=True)
        ratio = dict(zip([labels[u] for u in unique], counts/len(dominant)))

        transitions = np.sum(dominant[:-1] != dominant[1:])
        volatility  = np.mean(np.abs(np.diff(timeline, axis=0)))
        peak   = dict(zip(labels, timeline.max(0)))
        var    = dict(zip(labels, timeline.var(0)))

        features = []
        for e in labels: features.append(ratio.get(e,0))
        features += [transitions, volatility]
        for e in labels: features.append(peak[e])
        for e in labels: features.append(var[e])

        row = list(np.array(features, dtype=np.float32)) + [label]

        with open("../data/deception_dataset.csv","a") as f:
            csv.writer(f).writerow(row)

        print(f"✅ Processed {video_file} ({class_name})")


 Saved 52 frames to ../data/frames/trial_lie_041.mp4
✅ Processed trial_lie_041.mp4 (lie)
 Saved 64 frames to ../data/frames/trial_lie_055.mp4
✅ Processed trial_lie_055.mp4 (lie)
 Saved 40 frames to ../data/frames/trial_lie_054.mp4
✅ Processed trial_lie_054.mp4 (lie)
 Saved 50 frames to ../data/frames/trial_lie_040.mp4
✅ Processed trial_lie_040.mp4 (lie)
 Saved 63 frames to ../data/frames/trial_lie_056.mp4
✅ Processed trial_lie_056.mp4 (lie)
 Saved 48 frames to ../data/frames/trial_lie_042.mp4
✅ Processed trial_lie_042.mp4 (lie)
 Saved 29 frames to ../data/frames/trial_lie_043.mp4
✅ Processed trial_lie_043.mp4 (lie)
 Saved 41 frames to ../data/frames/trial_lie_057.mp4
✅ Processed trial_lie_057.mp4 (lie)
 Saved 72 frames to ../data/frames/trial_lie_053.mp4
✅ Processed trial_lie_053.mp4 (lie)
 Saved 35 frames to ../data/frames/trial_lie_047.mp4
✅ Processed trial_lie_047.mp4 (lie)
 Saved 71 frames to ../data/frames/trial_lie_046.mp4
✅ Processed trial_lie_046.mp4 (lie)
 Saved 96 frames to .

In [None]:
row = list(X_sample) + [label]
file = "../data/deception_dataset.csv"
header = [f"f{i}" for i in range(23)] + ["label"]

if not os.path.exists(file):
    with open(file, "w", newline="") as f:
        csv.writer(f).writerow(header)

with open(file, "a", newline="") as f:
    csv.writer(f).writerow(row)

print("✅ saved to dataset")


✅ saved to dataset


In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("../data/deception_dataset.csv")

# Separate features & labels
X = df.drop("label", axis=1)
y = df["label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train classifier
clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train, y_train)

# Test performance
preds = clf.predict(X_test)
print("✅ Model Trained!")
print("\nAccuracy:", accuracy_score(y_test, preds))
print("\nReport:\n", classification_report(y_test, preds))


✅ Model Trained!

Accuracy: 0.76

Report:
               precision    recall  f1-score   support

           0       0.85      0.73      0.79        15
           1       0.67      0.80      0.73        10

    accuracy                           0.76        25
   macro avg       0.76      0.77      0.76        25
weighted avg       0.77      0.76      0.76        25



In [56]:
import joblib
joblib.dump(clf, "../models/deception_classifier.pkl")
print("✅ Lie/Truth classifier saved!")


✅ Lie/Truth classifier saved!


In [58]:
import pandas as pd
import joblib

clf = joblib.load("../models/deception_classifier.pkl")

# Convert X_sample to DataFrame with same column names as training data
X_input = pd.DataFrame([X_sample], columns=clf.feature_names_in_)

prediction = clf.predict(X_input)
print("Prediction:", "LIE" if prediction[0] == 0 else "TRUTH")



Prediction: LIE
