In [1]:
# ==============================================================================
# SCRIPT: V2_gatekeeper_2025_07_25.py
#
# PURPOSE:
# This script trains a binary classification model, referred to as the "Gatekeeper,"
# whose sole job is to distinguish between a genuine, classifiable emotional
# expression and a non-emotional facial action (e.g., mid-speech movements).
# ==============================================================================

# V2 changes:
    # section 3 - updated pretrained model to V1_gatekeeper model

In [2]:
import torch
from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import os
from datetime import datetime

In [3]:
# ==============================================================================
# 1. CONFIGURATION
# ==============================================================================

# --- Path to the NEW dataset you just created in Step 1 ---
DATASET_PATH = "/Users/natalyagrokh/AI/ml_expressions/img_datasets/gatekeeper_dataset"

# --- Define where to save the new V2 Gatekeeper model ---
OUTPUT_DIR_ROOT = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/gatekeeper_models"
MODEL_NAME = f"gatekeeper_V2_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # <-- Updated to V2
FINAL_OUTPUT_DIR = os.path.join(OUTPUT_DIR_ROOT, MODEL_NAME)
os.makedirs(FINAL_OUTPUT_DIR, exist_ok=True)

In [4]:
# ==============================================================================
# 2. DATA LOADING & PREPARATION
# ==============================================================================

# --- Load the dataset from the folders ---
# The labels ('emotion', 'speech_action') will be automatically inferred.
print(f"--- Loading dataset from: {DATASET_PATH} ---")
dataset = load_dataset("imagefolder", data_dir=DATASET_PATH)
train_test_split = dataset['train'].train_test_split(test_size=0.2, stratify_by_column='label')
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

def transform(examples):
    images = [img.convert("RGB") for img in examples["image"]]
    examples["pixel_values"] = processor(images, return_tensors="pt")['pixel_values']
    return examples

train_dataset.set_transform(transform)
eval_dataset.set_transform(transform)

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch])
    }

--- Loading dataset from: /Users/natalyagrokh/AI/ml_expressions/img_datasets/gatekeeper_dataset ---


Resolving data files:   0%|          | 0/3323 [00:00<?, ?it/s]

Downloading data:   0%|          | 0/3322 [00:00<?, ?files/s]

Generating train split: 0 examples [00:00, ? examples/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


In [5]:
# ==============================================================================
# 3. MODEL TRAINING
# ==============================================================================

# --- Load the PREVIOUSLY TRAINED V1 GATEKEEPER MODEL ---
# Now fine-tuning from the current best Gatekeeper.
GATEKEEPER_V1_PATH = "/Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/gatekeeper_models/gatekeeper_V2_20250725_151114/checkpoint-632"

labels = dataset['train'].features['label'].names
model = AutoModelForImageClassification.from_pretrained(
    GATEKEEPER_V1_PATH,
    num_labels=len(labels),
    id2label={i: label for i, label in enumerate(labels)},
    label2id={label: i for i, label in enumerate(labels)}
)

training_args = TrainingArguments(
    output_dir=FINAL_OUTPUT_DIR,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=4,
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    remove_unused_columns=False
)

# --- Define Metrics ---
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='binary', pos_label=1)
    acc = accuracy_score(p.label_ids, preds)
    return {'accuracy': acc, 'f1': f1, 'precision': precision, 'recall': recall}

# --- Initialize and Run the Trainer ---
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    tokenizer=processor,
)

print(f"\n--- Starting training for Gatekeeper Model: {MODEL_NAME} ---")
trainer.train()

print(f"\n✅ Training complete. Best model saved to: {FINAL_OUTPUT_DIR}")

  trainer = Trainer(



--- Starting training for Gatekeeper Model: gatekeeper_V2_20250729_065459 ---




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0951,0.076296,0.977444,0.979253,0.967213,0.991597
2,0.0424,0.086457,0.978947,0.980447,0.977716,0.983193
3,0.0267,0.072759,0.983459,0.984529,0.988701,0.980392
4,0.0022,0.069615,0.984962,0.985994,0.985994,0.985994





✅ Training complete. Best model saved to: /Users/natalyagrokh/AI/ml_expressions/img_expressions/data_flywheel/gatekeeper_models/gatekeeper_V2_20250729_065459
