In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import torchvision.models as models
import torch.nn as nn
import torch.optim as optim

In [2]:
df = pd.read_pickle('combined_df.pkl')

In [3]:
df = df.dropna(subset=['image'])

In [5]:
df.label.unique()

array(['irritation', 'frustration', 'wrath', 'rage', 'anger',
       'exasperation', 'spite', 'fury', 'annoyance', 'resentment',
       'grumpiness', 'outrage', 'aggravation', 'hostility', 'grouchiness',
       'desire', 'passion', 'longing', 'lust', 'infatuation', 'gloom',
       'rejection', 'disappointment', 'unhappiness', 'defeat',
       'agitation', 'insult', 'loneliness', 'alienation', 'hate',
       'displeasure', 'jealousy', 'dislike', 'contempt', 'disgust',
       'envy', 'revulsion', 'bitterness', 'scorn', 'loathing', 'horror',
       'terror', 'dread', 'nervousness', 'fear', 'worry', 'panic',
       'fright', 'hysteria', 'alarm', 'dismay', 'tenseness',
       'apprehension', 'anxiety', 'uneasiness', 'hurt', 'suffering',
       'agony', 'distress', 'anguish', 'insecurity', 'torment',
       'optimism', 'attraction', 'excitement', 'liking', 'eagerness',
       'hope', 'zeal', 'arousal', 'gladness', 'joy', 'happiness',
       'delight', 'rapture', 'cheerfulness', 'joviality', 

In [13]:
# Define the two-category mapping
engagement_mapping = {
    "engaged": [
        # Anger-related
        "irritation", "frustration", "wrath", "rage", "anger", "exasperation", "spite",
        "fury", "annoyance", "resentment", "grumpiness", "outrage", "aggravation",
        "hostility", "grouchiness", "vengefulness",
        
        # Desire & passion
        "desire", "passion", "longing", "lust", "infatuation",'agitation',
        
        # Fear & alertness
        "terror", "dread", "nervousness", "fear", "worry", "panic", "fright", "hysteria",
        "alarm", "dismay", "tenseness", "apprehension", "anxiety", "uneasiness", 'horror',
        
        # Joy & enthusiasm
        "optimism", "attraction", "excitement", "liking", "eagerness", "hope", "zeal",
        "arousal", "gladness", "joy", "happiness", "delight", "rapture", "cheerfulness",
        "joviality", "elation", "bliss", "enjoyment", "ecstasy", "gaiety", "euphoria",
        "jubilation", "glee", "jolliness", "zest",
        
        # Love & connection
        "caring", "love", "affection", "compassion", "adoration", "pity", "sentimentality",
        "tenderness", "sympathy", "fondness",
        
        # Pride & achievement
        "pride", "satisfaction", "pleasure", "triumph", "relief",
        
        # Surprise & stimulation
        "astonishment", "shock", "amusement", "amazement", "surprise", "thrill",
        "enthusiasm", "exhilaration", "enthrallment"
    ],
    "not engaged": [
        # Sadness & disappointment
        "gloom", "rejection", "disappointment", "unhappiness", "defeat", "isolation",
        "neglect", "sadness", "hopelessness", "grief", "despair", "depression",
        "sorrow", "melancholy", "dejection", "misery", "woe", "glumness", "homesickness",
        
        # Social/negative distancing
        "insult", "loneliness", "alienation", "hate", "displeasure", "jealousy", "dislike",
        "contempt", "disgust", "envy", "revulsion", "bitterness", "scorn", "loathing",
        
        # Pain & insecurity
        "hurt", "suffering", "agony", "distress", "anguish", "insecurity", "torment",
        
        # Guilt & shame
        "guilt", "humiliation", "shame", "embarrassment", "remorse", "regret",
        "mortification",
        
        # Mild positive or neutral
        "contentment"
    ]
}

# Flatten to label -> category mapping
flat_mapping = {label: category for category, labels in engagement_mapping.items() for label in labels}

# Apply mapping to your DataFrame
df['engagement_type'] = df['label'].map(flat_mapping)

# Optional: View results
print(df['engagement_type'].value_counts())


engagement_type
engaged        16901
not engaged    12788
Name: count, dtype: int64


In [15]:
# Step 1: Define the target sample size
min_class_size = df['engagement_type'].value_counts().min()

# Step 2: Sample each class down to the minimum size
df_balanced = (
    df.groupby('engagement_type', group_keys=False)
      .apply(lambda x: x.sample(n=min_class_size, random_state=42))
      .reset_index(drop=True)
)

# Step 3: Check the balance
print(df_balanced['engagement_type'].value_counts())

engagement_type
engaged        12788
not engaged    12788
Name: count, dtype: int64


  .apply(lambda x: x.sample(n=min_class_size, random_state=42))


In [16]:
# Encode labels
le = LabelEncoder()
df_balanced['label'] = le.fit_transform(df_balanced['engagement_type'])

In [17]:
# Split
train_df, val_df = train_test_split(df_balanced, test_size=0.2, random_state=42)

In [18]:
class EngagementDataset(Dataset):
    def __init__(self, dataframe):
        self.images = dataframe['image'].tolist()
        self.labels = dataframe['label'].tolist()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        return image, label

In [19]:
batch_size = 32

train_dataset = EngagementDataset(train_df)
val_dataset = EngagementDataset(val_df)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)


In [22]:
num_classes = len(le.classes_)

model = models.alexnet(pretrained=True)
model.classifier = nn.Sequential(
    nn.Dropout(p=0.6),                      # ← increased dropout
    nn.Linear(256 * 6 * 6, 4096),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.6),                      # ← increased dropout
    nn.Linear(4096, 4096),
    nn.ReLU(inplace=True),
    nn.Linear(4096, num_classes)
)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [21]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5)

In [23]:
num_epochs = 30

for epoch in range(num_epochs):
    # -------- TRAINING --------
    model.train()
    running_loss = 0.0
    correct_train, total_train = 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    avg_train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct_train / total_train

    # -------- VALIDATION --------
    model.eval()
    correct_val, total_val = 0, 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_accuracy = 100 * correct_val / total_val

    # -------- LOG RESULTS --------
    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {avg_train_loss:.4f} | "
          f"Train Acc: {train_accuracy:.2f}% | "
          f"Val Acc: {val_accuracy:.2f}%")


Epoch 1/30 | Train Loss: 0.7113 | Train Acc: 50.33% | Val Acc: 50.33%
Epoch 2/30 | Train Loss: 0.7094 | Train Acc: 50.64% | Val Acc: 50.33%
Epoch 3/30 | Train Loss: 0.7131 | Train Acc: 49.70% | Val Acc: 50.33%
Epoch 4/30 | Train Loss: 0.7116 | Train Acc: 50.43% | Val Acc: 50.33%
Epoch 5/30 | Train Loss: 0.7131 | Train Acc: 49.94% | Val Acc: 50.33%
Epoch 6/30 | Train Loss: 0.7140 | Train Acc: 49.25% | Val Acc: 50.33%
Epoch 7/30 | Train Loss: 0.7124 | Train Acc: 49.86% | Val Acc: 50.33%
Epoch 8/30 | Train Loss: 0.7137 | Train Acc: 49.57% | Val Acc: 50.33%
Epoch 9/30 | Train Loss: 0.7105 | Train Acc: 50.30% | Val Acc: 50.33%
Epoch 10/30 | Train Loss: 0.7121 | Train Acc: 50.00% | Val Acc: 50.33%
Epoch 11/30 | Train Loss: 0.7140 | Train Acc: 49.65% | Val Acc: 50.33%
Epoch 12/30 | Train Loss: 0.7114 | Train Acc: 50.00% | Val Acc: 50.33%
Epoch 13/30 | Train Loss: 0.7160 | Train Acc: 49.08% | Val Acc: 50.33%
Epoch 14/30 | Train Loss: 0.7119 | Train Acc: 50.10% | Val Acc: 50.33%
Epoch 15/30 | T