### data prep

In [None]:
from preprocessing.loader_results import ResultsLoader
from preprocessing.loader_text import TextLoader
from preprocessing.loader_audio import AudioLoader
from preprocessing.loader_face import FaceLoader

# Initialize loaders
results_loader = ResultsLoader()
text_loader = TextLoader()
audio_loader = AudioLoader()
face_loader = FaceLoader()

# Get balanced subset of data (2% of total data)
percentage = 0.02
random_state = 42


In [None]:
# Load results data
df_result = results_loader.get_data(percentage=percentage, random_state=random_state)
# Load face features with 50ms downsampling and 10s rolling window
df_face = face_loader.get_data(
    percentage=percentage,
    random_state=random_state,
    ds_freq="10s",
    rw_size="10s"
)

display(df_face)
display(df_result)

### EDA
* eigenfaces
* distribution of landmarks / action units

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA

# Set up the plotting style
#plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [12, 8]

# Extract landmark features (x, y coordinates)
landmark_features = [col for col in df_face.columns if 'CLNFfeatures_' in col]
landmark_data = df_face[landmark_features]

# Perform PCA for eigenfaces
pca = PCA(n_components=10)
landmark_pca = pca.fit_transform(landmark_data)

# Plot explained variance ratio
plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance Ratio')
plt.title('Explained Variance Ratio of Landmark PCA Components')
plt.show()

# Plot first few eigenfaces
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()

for i in range(6):
    if i < len(pca.components_):
        component = pca.components_[i].reshape(-1, 2)  # Reshape to (68, 2) for x,y coordinates
        axes[i].scatter(component[:, 0], component[:, 1], alpha=0.6)
        axes[i].set_title(f'Eigenface {i + 1}')
        axes[i].set_xlabel('X coordinate')
        axes[i].set_ylabel('Y coordinate')

plt.tight_layout()
plt.show()

# Distribution of Action Units
au_features = [col for col in df_face.columns if 'CLNFAUs_' in col]

# Create violin plots for Action Units
plt.figure(figsize=(15, 8))
sns.violinplot(data=df_face[au_features])
plt.xticks(rotation=45, ha='right')
plt.title('Distribution of Action Units')
plt.tight_layout()
plt.show()

# Correlation matrix of Action Units
plt.figure(figsize=(12, 10))
sns.heatmap(df_face[au_features].corr(), cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Action Units')
plt.tight_layout()
plt.show()

# Distribution of landmark positions
# Take mean positions for each ID to get a general idea of face shape
mean_landmarks = df_face.groupby('ID')[landmark_features].mean()

x_coords = mean_landmarks[[col for col in landmark_features if '_x' in col]].values
y_coords = mean_landmarks[[col for col in landmark_features if '_y' in col]].values

plt.figure(figsize=(10, 10))
plt.scatter(x_coords, y_coords, alpha=0.5)
plt.title('Average Landmark Positions')
plt.xlabel('X coordinate')
plt.ylabel('Y coordinate')
plt.axis('equal')
plt.show()


### PCA

In [None]:
import pandas as pd

# Extract different feature types
gaze_features = [col for col in df_face.columns if 'CLNFgaze_' in col]
au_features = [col for col in df_face.columns if 'CLNFAUs_' in col]
pose_features = [col for col in df_face.columns if 'CLNFpose_' in col]

# Dictionary to store PCA results
pca_results = {}

# Perform PCA for each feature type
for feature_name, features in [('Gaze', gaze_features),
                               ('Action Units', au_features),
                               ('Pose', pose_features)]:

    # Skip if no features found
    if not features:
        continue

    # Extract data
    feature_data = df_face[features]

    # Perform PCA
    pca = PCA(n_components=min(10, len(features)))
    feature_pca = pca.fit_transform(feature_data)

    # Store results
    pca_results[feature_name] = {
        'pca': pca,
        'transformed': feature_pca
    }

    # Plot explained variance ratio
    plt.figure(figsize=(10, 6))
    plt.plot(np.cumsum(pca.explained_variance_ratio_))
    plt.xlabel('Number of Components')
    plt.ylabel('Cumulative Explained Variance Ratio')
    plt.title(f'Explained Variance Ratio of {feature_name} PCA Components')
    plt.grid(True)
    plt.show()

    # Print variance explained by each component
    print(f"\n{feature_name} - Explained variance ratio by component:")
    for i, ratio in enumerate(pca.explained_variance_ratio_):
        print(f"PC{i + 1}: {ratio:.4f}")

    # Plot feature importance heatmap
    plt.figure(figsize=(12, 8))
    feature_importance = pd.DataFrame(
        pca.components_.T,
        columns=[f'PC{i + 1}' for i in range(feature_pca.shape[1])],
        index=features
    )
    sns.heatmap(feature_importance, cmap='coolwarm', center=0)
    plt.title(f'Feature Importance in {feature_name} Principal Components')
    plt.tight_layout()
    plt.show()

# Create combined PCA features DataFrame
pca_df = pd.DataFrame(index=df_face.index)
for feature_name, result in pca_results.items():
    feature_cols = [f'{feature_name}_PC{i + 1}' for i in range(result['transformed'].shape[1])]
    pca_df[feature_cols] = result['transformed']

# Save the transformed features for later use
pca_features = pca_df.copy()

### train test split

In [None]:
from sklearn.preprocessing import StandardScaler

# merge TODO: make sure to use the PCA df
df = pd.merge(df_face, df_result, on='ID')

from sklearn.model_selection import TimeSeriesSplit

# Create TimeSeriesSplit object
tscv = TimeSeriesSplit(n_splits=5)

# Prepare X and y
X = df.drop(['PHQ_Binary'], axis=1)
y = df['PHQ_Binary']

# Initialize lists to store the split indices
train_indices = []
test_indices = []

# Get split indices while preserving the temporal order
for train_idx, test_idx in tscv.split(X):
    train_indices.append(train_idx)
    test_indices.append(test_idx)

# Use the last fold for final train/test split
X_train = X.iloc[train_indices[-1]]
X_test = X.iloc[test_indices[-1]]
y_train = y.iloc[train_indices[-1]]
y_test = y.iloc[test_indices[-1]]

# Further split training data into train and validation
train_size = int(0.75 * len(X_train))
X_train, X_val = X_train.iloc[:train_size], X_train.iloc[train_size:]
y_train, y_val = y_train.iloc[:train_size], y_train.iloc[train_size:]

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

### pipeline & hyperparameter tuning

In [None]:
### Pipeline & Hyperparameter Tuning

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import ParameterGrid
import numpy as np
from tqdm import tqdm
from models.audio_rnn import AudioRNN
from models.face_strnn import FaceSTRNN, SpatialAttention, TemporalAttention

# Convert data to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train)
X_val_tensor = torch.FloatTensor(X_val)
X_test_tensor = torch.FloatTensor(X_test)
y_train_tensor = torch.LongTensor(y_train.values)
y_val_tensor = torch.LongTensor(y_val.values)
y_test_tensor = torch.LongTensor(y_test.values)

# Create DataLoaders with appropriate batch size
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# importing the models and training
from models.face_strnn import FaceSTRNN as model
import utils.trainer as train

# Hyperparameter grid
param_grid = {
    'hidden_size': [64, 128, 256],
    'num_layers': [1, 2],
    'dropout': [0.2, 0.3, 0.4],
    'learning_rate': [0.001, 0.0001],
    'weight_decay': [0.0001, 0.00001]
}

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Grid search
best_val_loss = float('inf')
best_params = None
results = []

for params in tqdm(ParameterGrid(param_grid)):
    # Model initialization
    model = FaceSTRNN(
        input_size=X_train.shape[2],  # Feature dimension
        hidden_size=params['hidden_size'],
        num_layers=params['num_layers'],
        num_classes=2,  # Binary classification
        dropout=params['dropout']
    )

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=params['learning_rate'],
        weight_decay=params['weight_decay']
    )

    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.1,
        patience=3,
        verbose=True
    )

    # Train the model
    train_losses, val_losses = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=scheduler,
        n_epochs=50,
        device=device
    )

    # Record results
    final_val_loss = val_losses[-1]
    results.append({
        'params': params,
        'final_val_loss': final_val_loss,
        'train_losses': train_losses,
        'val_losses': val_losses
    })

    # Update best parameters
    if final_val_loss < best_val_loss:
        best_val_loss = final_val_loss
        best_params = params

# Print best parameters
print("\nBest parameters:")
for param, value in best_params.items():
    print(f"{param}: {value}")
print(f"Best validation loss: {best_val_loss:.4f}")

# Plot training curves for best model
plt.figure(figsize=(10, 6))  # i dunno if 10 is a good figsize, take a look later
best_result = min(results, key=lambda x: x['final_val_loss'])
plt.plot(best_result['train_losses'], label='Training Loss')
plt.plot(best_result['val_losses'], label='Validation Loss')
plt.title('Training and Validation Loss (Best Model)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# After training the best model
# Save the model and scaler
# torch.save({
#     'model_state_dict': model.state_dict(),
#     'scaler_state_dict': scaler,
#     'input_size': X_train.shape[2],
#     'best_params': best_params
# }, 'face_model.pth')
train.save_model(model, scaler, "face_model.pth")

### evaluation

In [None]:
# from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
# 
# y_pred = grid_search.predict(X_test)
# 
# y_test = y_test.to_numpy()
# # Debug: Check the shapes and types of y_pred
# print(f'y_pred: {y_pred}')
# print(f'y_test: {y_test}')
# 
# # Generate and display classification report
# print("Classification Report:")
# print(classification_report(y_test, y_pred))
# 
# # Generate and display confusion matrix
# cm = confusion_matrix(y_test, y_pred, labels=grid_search.classes_)
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=grid_search.classes_)
# disp.plot(cmap=plt.cm.Blues)
# plt.show()
# 
