In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import glob
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import BatchNormalization
import pandas as pd
import numpy as np
from pathlib import Path
import glob
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')



In [9]:
class FinalMultimodalDataProcessor:
    def __init__(self, base_path="DatasetCercetare"):
        self.base_path = base_path
        self.sessions_data = {}

    def get_all_sessions(self):
        """Get all unique session names from the file structure"""
        # Get session names from AudioFeatures (since they're shared)
        audio_files = glob.glob(f"{self.base_path}/AudioFeatures/*.csv")
        sessions = [Path(file).stem for file in audio_files]
        return sessions

    def get_people_in_session(self, session_name):
        """Get the people involved in a session from ActionUnits files"""
        au_files = glob.glob(f"{self.base_path}/ActionUnits/*_on_{session_name}.csv")
        people = []
        for file in au_files:
            filename = Path(file).stem
            person = filename.split('_on_')[0]
            people.append(person)
        return people

    def load_person_data(self, person, session_name):
        """Load data for a specific person in a session"""
        person_data = {}

        print(f"  Loading data for {person} in session {session_name}")

        # Load Action Units for this person
        au_file = f"{self.base_path}/ActionUnits/{person}_on_{session_name}.csv"
        if Path(au_file).exists():
            au_df = pd.read_csv(au_file)
            au_features = ['AU01', 'AU02', 'AU04', 'AU05', 'AU06', 'AU07', 'AU09',
                          'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'AU20', 'AU23',
                          'AU25', 'AU26', 'AU28']
            person_data['action_units'] = au_df[['timestamp'] + au_features]
            print(f"    Action Units: {au_df.shape}")

        # Load Hand Gestures for this person
        hg_file = f"{self.base_path}/HandGestures/{person}_on_{session_name}.csv"
        if Path(hg_file).exists():
            hg_df = pd.read_csv(hg_file)
            hg_features = ['left_hand_velocity', 'right_hand_velocity',
                          'gesture_frequency_cumulative', 'face_touches_cumulative']
            person_data['hand_gestures'] = hg_df[['timestamp'] + hg_features]
            print(f"    Hand Gestures: {hg_df.shape}")

        # Load shared Audio Features (same for all people in session)
        audio_file = f"{self.base_path}/AudioFeatures/{session_name}.csv"
        if Path(audio_file).exists():
            audio_df = pd.read_csv(audio_file)
            audio_features = ['energy_db', 'pitch_hz', 'speaking_rate']
            audio_df = audio_df.rename(columns={'time_seconds': 'timestamp'})
            person_data['audio'] = audio_df[['timestamp'] + audio_features]
            print(f"    Audio Features: {audio_df.shape}")

        # Load shared Sentiment Analysis (filter by speaker if available)
        sent_file = f"{self.base_path}/SentimentAnalysis/{session_name}.csv"
        if Path(sent_file).exists():
            sent_df = pd.read_csv(sent_file)
            sent_df = sent_df.rename(columns={'second': 'timestamp'})

            # Filter by speaker if the person name matches
            if 'speaker' in sent_df.columns:
                # Try to match person name with speaker (case insensitive)
                person_sent = sent_df[sent_df['speaker'].str.lower() == person.lower()]
                if len(person_sent) > 0:
                    person_data['sentiment'] = person_sent[['timestamp', 'compound', 'pos', 'neu', 'neg']]
                    print(f"    Sentiment (filtered for {person}): {person_sent.shape}")
                else:
                    # If no match, use aggregated sentiment for all speakers
                    sent_agg = sent_df.groupby('timestamp').agg({
                        'compound': 'mean', 'pos': 'mean', 'neu': 'mean', 'neg': 'mean'
                    }).reset_index()
                    person_data['sentiment'] = sent_agg
                    print(f"    Sentiment (aggregated): {sent_agg.shape}")
            else:
                person_data['sentiment'] = sent_df[['timestamp', 'compound', 'pos', 'neu', 'neg']]
                print(f"    Sentiment: {sent_df.shape}")

        return person_data

    def align_person_data(self, person_data, target_fps=1.0):
        """Align all modalities for a person to the same temporal grid"""

        # Find common time range
        min_time = 0
        max_time = float('inf')

        for modality, data in person_data.items():
            if len(data) > 0:
                min_time = max(min_time, data['timestamp'].min())
                max_time = min(max_time, data['timestamp'].max())

        # Create target timeline
        target_timeline = np.arange(int(min_time), int(max_time) + 1)
        aligned_data = pd.DataFrame({'timestamp': target_timeline})

        # Align each modality
        for modality, data in person_data.items():
            if modality == 'audio':
                # Aggregate high-frequency audio to 1-second intervals
                audio_agg = data.groupby(data['timestamp'].round()).agg({
                    'energy_db': 'mean',
                    'pitch_hz': 'mean',
                    'speaking_rate': 'mean'
                }).reset_index()
                aligned_data = aligned_data.merge(audio_agg, on='timestamp', how='left')

            else:
                # For other modalities, use nearest second matching
                data_rounded = data.copy()
                data_rounded['timestamp'] = data_rounded['timestamp'].round().astype(int)
                data_agg = data_rounded.groupby('timestamp').first().reset_index()
                aligned_data = aligned_data.merge(data_agg, on='timestamp', how='left')

        # Fill missing values
        aligned_data = aligned_data.fillna(method='ffill').fillna(0)

        return aligned_data

    def process_all_data(self):
        """Process all sessions and people"""
        sessions = self.get_all_sessions()
        print(f"Found sessions: {sessions}")

        all_processed_data = {}

        for session in sessions:
            print(f"\n=== Processing Session: {session} ===")
            people = self.get_people_in_session(session)
            print(f"People in session: {people}")

            session_data = {}

            for person in people:
                # Load person's data
                person_data = self.load_person_data(person, session)

                # Align temporal data
                aligned_data = self.align_person_data(person_data)

                # Add person and session info
                aligned_data['person'] = person
                aligned_data['session'] = session

                print(f"    {person} final shape: {aligned_data.shape}")
                print(f"    {person} features: {[col for col in aligned_data.columns if col not in ['timestamp', 'person', 'session']]}")

                session_data[person] = aligned_data
                all_processed_data[f"{session}_{person}"] = aligned_data

            self.sessions_data[session] = session_data

        return all_processed_data

# Process all data
processor = FinalMultimodalDataProcessor()
all_data = processor.process_all_data()

print("\n" + "="*60)
print("FINAL DATA SUMMARY")
print("="*60)
for key, data in all_data.items():
    print(f"{key}:")
    print(f"  Shape: {data.shape}")
    print(f"  Duration: {data['timestamp'].max() - data['timestamp'].min():.0f} seconds")
    print(f"  Features: {len([col for col in data.columns if col not in ['timestamp', 'person', 'session']])}")
    print()

Found sessions: ['DaemahniGianna', 'StephenKeala']

=== Processing Session: DaemahniGianna ===
People in session: ['Daemahni', 'Gianna']
  Loading data for Daemahni in session DaemahniGianna
    Action Units: (85, 18)
    Hand Gestures: (253, 15)
    Audio Features: (3656, 12)
    Sentiment (aggregated): (83, 5)
    Daemahni final shape: (83, 31)
    Daemahni features: ['AU01', 'AU02', 'AU04', 'AU05', 'AU06', 'AU07', 'AU09', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'AU20', 'AU23', 'AU25', 'AU26', 'AU28', 'left_hand_velocity', 'right_hand_velocity', 'gesture_frequency_cumulative', 'face_touches_cumulative', 'energy_db', 'pitch_hz', 'speaking_rate', 'compound', 'pos', 'neu', 'neg']
  Loading data for Gianna in session DaemahniGianna
    Action Units: (85, 18)
    Hand Gestures: (253, 15)
    Audio Features: (3656, 12)
    Sentiment (aggregated): (83, 5)
    Gianna final shape: (83, 31)
    Gianna features: ['AU01', 'AU02', 'AU04', 'AU05', 'AU06', 'AU07', 'AU09', 'AU10', 'AU12', 'AU14', 'A

In [10]:
ground_truth = {
    'session_person': ['DaemahniGianna_Daemahni', 'DaemahniGianna_Gianna',
                       'StephenKeala_Keala', 'StephenKeala_Stephen'],
    'is_attracted': [1, 1, 0, 0]  # Based on your ground truth
}

ground_truth_df = pd.DataFrame(ground_truth)
print("Ground Truth Labels:")
print(ground_truth_df)
print()

Ground Truth Labels:
            session_person  is_attracted
0  DaemahniGianna_Daemahni             1
1    DaemahniGianna_Gianna             1
2       StephenKeala_Keala             0
3     StephenKeala_Stephen             0



In [11]:
class MultimodalDataPreprocessor:
    """Handles data preprocessing and normalization for multimodal attraction data"""

    def __init__(self, sequence_length=15):
        self.sequence_length = sequence_length
        self.feature_scaler = StandardScaler()
        self.feature_names = None
        self.is_fitted = False

    def create_sequences(self, all_data, ground_truth_df):
        """Create sequences for RNN training"""
        X_sequences = []
        y_labels = []
        sequence_info = []

        print("Creating sequences...")

        for key, data in all_data.items():
            # Get label for this person
            label_row = ground_truth_df[ground_truth_df['session_person'] == key]
            if len(label_row) == 0:
                continue

            label = label_row['is_attracted'].iloc[0]

            # Remove non-feature columns
            feature_data = data.drop(['timestamp', 'person', 'session'], axis=1)

            # Store feature names (from first dataset)
            if self.feature_names is None:
                self.feature_names = feature_data.columns.tolist()

            # Create overlapping sequences
            for i in range(len(feature_data) - self.sequence_length + 1):
                sequence = feature_data.iloc[i:i + self.sequence_length].values
                X_sequences.append(sequence)
                y_labels.append(label)
                sequence_info.append({
                    'person': key,
                    'start_time': i,
                    'end_time': i + self.sequence_length - 1
                })

        X = np.array(X_sequences)
        y = np.array(y_labels)

        print(f"Created {len(X)} sequences")
        print(f"Sequence shape: {X.shape}")
        print(f"Features: {len(self.feature_names)}")
        print(f"Class distribution: {np.bincount(y)}")

        return X, y, sequence_info

    def fit_normalizer(self, X_train):
        """Fit the feature normalizer on training data"""
        print("Fitting feature normalizer...")

        # Reshape for normalization (samples*time, features)
        X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])

        # Fit scaler
        self.feature_scaler.fit(X_train_reshaped)
        self.is_fitted = True

        print("Feature normalizer fitted!")
        return self

    def normalize_features(self, X):
        """Normalize features using fitted scaler"""
        if not self.is_fitted:
            raise ValueError("Normalizer not fitted! Call fit_normalizer() first.")

        # Reshape for normalization
        original_shape = X.shape
        X_reshaped = X.reshape(-1, X.shape[-1])

        # Transform
        X_normalized = self.feature_scaler.transform(X_reshaped)
        X_normalized = X_normalized.reshape(original_shape)

        return X_normalized

    def split_by_person(self, X, y, sequence_info, validation_split=0.2):
        """Split data by person to avoid data leakage"""
        print("Splitting data by person...")

        # Group sequences by person
        person_sequences = {}
        for i, info in enumerate(sequence_info):
            person = info['person']
            if person not in person_sequences:
                person_sequences[person] = []
            person_sequences[person].append(i)

        # Split by person
        train_indices = []
        val_indices = []

        for person, indices in person_sequences.items():
            n_val = max(1, int(len(indices) * validation_split))
            val_indices.extend(indices[-n_val:])  # Last sequences for validation
            train_indices.extend(indices[:-n_val])  # Rest for training

        X_train = X[train_indices]
        X_val = X[val_indices]
        y_train = y[train_indices]
        y_val = y[val_indices]

        print(f"Train set: {len(X_train)} sequences")
        print(f"Val set: {len(X_val)} sequences")
        print(f"Train class distribution: {np.bincount(y_train)}")
        print(f"Val class distribution: {np.bincount(y_val)}")

        return X_train, X_val, y_train, y_val, train_indices, val_indices

    def prepare_training_data(self, all_data, ground_truth_df, validation_split=0.2):
        """Complete data preparation pipeline"""
        # Create sequences
        X, y, sequence_info = self.create_sequences(all_data, ground_truth_df)

        # Split by person
        X_train, X_val, y_train, y_val, train_idx, val_idx = self.split_by_person(
            X, y, sequence_info, validation_split
        )

        # Fit normalizer on training data
        self.fit_normalizer(X_train)

        # Normalize both sets
        X_train_norm = self.normalize_features(X_train)
        X_val_norm = self.normalize_features(X_val)

        return {
            'X_train': X_train_norm,
            'X_val': X_val_norm,
            'y_train': y_train,
            'y_val': y_val,
            'train_indices': train_idx,
            'val_indices': val_idx,
            'sequence_info': sequence_info
        }

    def preprocess_new_data(self, person_data):
        """Preprocess new data for prediction"""
        if not self.is_fitted:
            raise ValueError("Preprocessor not fitted! Train model first.")

        # Remove non-feature columns
        feature_data = person_data.drop(['timestamp', 'person', 'session'], axis=1, errors='ignore')

        # Create sequences
        sequences = []
        for i in range(len(feature_data) - self.sequence_length + 1):
            sequence = feature_data.iloc[i:i + self.sequence_length].values
            sequences.append(sequence)

        if len(sequences) == 0:
            raise ValueError(f"Not enough data points. Need at least {self.sequence_length} time steps.")

        X = np.array(sequences)
        X_normalized = self.normalize_features(X)

        return X_normalized


DATA PREPROCESSING
Creating sequences...
Created 278 sequences
Sequence shape: (278, 15, 28)
Features: 28
Class distribution: [140 138]
Splitting data by person...
Train set: 225 sequences
Val set: 53 sequences
Train class distribution: [113 112]
Val class distribution: [27 26]
Fitting feature normalizer...
Feature normalizer fitted!

Preprocessing complete!
Training data shape: (225, 15, 28)
Validation data shape: (53, 15, 28)
Feature names: ['AU01', 'AU02', 'AU04', 'AU05', 'AU06']... (showing first 5)


In [17]:
# Initialize preprocessor
print("="*60)
print("DATA PREPROCESSING")
print("="*60)

preprocessor = MultimodalDataPreprocessor(sequence_length=15)
data_dict = preprocessor.prepare_training_data(all_data, ground_truth_df)

print(f"\nPreprocessing complete!")
print(f"Training data shape: {data_dict['X_train'].shape}")
print(f"Validation data shape: {data_dict['X_val'].shape}")
print(f"Feature names: {preprocessor.feature_names[:28]}... (showing first 5)")

DATA PREPROCESSING
Creating sequences...
Created 278 sequences
Sequence shape: (278, 15, 28)
Features: 28
Class distribution: [140 138]
Splitting data by person...
Train set: 225 sequences
Val set: 53 sequences
Train class distribution: [113 112]
Val class distribution: [27 26]
Fitting feature normalizer...
Feature normalizer fitted!

Preprocessing complete!
Training data shape: (225, 15, 28)
Validation data shape: (53, 15, 28)
Feature names: ['AU01', 'AU02', 'AU04', 'AU05', 'AU06', 'AU07', 'AU09', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'AU20', 'AU23', 'AU25', 'AU26', 'AU28', 'left_hand_velocity', 'right_hand_velocity', 'gesture_frequency_cumulative', 'face_touches_cumulative', 'energy_db', 'pitch_hz', 'speaking_rate', 'compound', 'pos', 'neu', 'neg']... (showing first 5)


In [16]:
class AttractionLSTMModel:
    """LSTM model for attraction prediction"""

    def __init__(self):
        self.model = None
        self.history = None

    def build_model(self, input_shape):
        """Build LSTM model architecture"""
        model = Sequential([
            # First LSTM layer
            LSTM(64, return_sequences=True, input_shape=input_shape, dropout=0.2),
            BatchNormalization(),

            # Second LSTM layer
            LSTM(32, return_sequences=True, dropout=0.2),
            BatchNormalization(),

            # Third LSTM layer
            LSTM(16, dropout=0.2),
            BatchNormalization(),

            # Dense layers
            Dense(32, activation='relu'),
            Dropout(0.3),
            Dense(16, activation='relu'),
            Dropout(0.2),

            # Output layer
            Dense(1, activation='sigmoid')
        ])

        model.compile(
            optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy', 'precision', 'recall']
        )

        self.model = model
        return model

    def train(self, X_train, y_train, X_val, y_val, epochs=100, batch_size=16):
        """Train the model"""
        if self.model is None:
            input_shape = (X_train.shape[1], X_train.shape[2])
            self.build_model(input_shape)

        print("\nModel Architecture:")
        self.model.summary()

        # Callbacks
        callbacks = [
            tf.keras.callbacks.EarlyStopping(
                monitor='val_loss', patience=20, restore_best_weights=True
            ),
            tf.keras.callbacks.ReduceLROnPlateau(
                monitor='val_loss', factor=0.5, patience=10, min_lr=1e-6
            )
        ]

        # Handle class imbalance
        class_weight = None
        if len(np.unique(y_train)) > 1:
            from sklearn.utils.class_weight import compute_class_weight
            classes = np.unique(y_train)
            weights = compute_class_weight('balanced', classes=classes, y=y_train)
            class_weight = {classes[i]: weights[i] for i in range(len(classes))}
            print(f"Class weights: {class_weight}")

        print("\nTraining model...")
        self.history = self.model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            class_weight=class_weight,
            verbose=1
        )

        return self.history

    def predict(self, X):
        """Make predictions"""
        if self.model is None:
            raise ValueError("Model not trained!")
        return self.model.predict(X)

    def evaluate(self, X, y):
        """Evaluate model performance"""
        if self.model is None:
            raise ValueError("Model not trained!")
        return self.model.evaluate(X, y)


In [18]:
print("="*60)
print("MODEL TRAINING")
print("="*60)

attraction_model = AttractionLSTMModel()
history = attraction_model.train(
    data_dict['X_train'],
    data_dict['y_train'],
    data_dict['X_val'],
    data_dict['y_val']
)

MODEL TRAINING

Model Architecture:


Class weights: {0: 0.995575221238938, 1: 1.0044642857142858}

Training model...
Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 63ms/step - accuracy: 0.5156 - loss: 0.7281 - precision: 0.5238 - recall: 0.2946 - val_accuracy: 0.8113 - val_loss: 0.6677 - val_precision: 0.7222 - val_recall: 1.0000 - learning_rate: 0.0010
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7244 - loss: 0.5705 - precision: 0.7604 - recall: 0.6518 - val_accuracy: 0.9434 - val_loss: 0.6382 - val_precision: 0.8966 - val_recall: 1.0000 - learning_rate: 0.0010
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8356 - loss: 0.4629 - precision: 0.9121 - recall: 0.7411 - val_accuracy: 0.9623 - val_loss: 0.5956 - val_precision: 0.9286 - val_recall: 1.0000 - learning_rate: 0.0010
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8844 - loss: 0.3

In [20]:
best_val_loss = min(attraction_model.history.history['val_loss'])
best_val_acc = max(attraction_model.history.history['val_accuracy'])

print(f"\nBest validation loss: {best_val_loss:.4f}")
print(f"Best validation accuracy: {best_val_acc:.4f}")

# Evaluate on validation set
print("\nFinal model performance:")
val_loss, val_acc, val_precision, val_recall = attraction_model.evaluate(
    data_dict['X_val'],
    data_dict['y_val'],
)

print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}")
print(f"Validation Precision: {val_precision:.4f}")
print(f"Validation Recall: {val_recall:.4f}")

# Test predictions on validation set
val_predictions = attraction_model.predict(data_dict['X_val'])
val_pred_binary = (val_predictions > 0.5).astype(int)

print(f"\nPrediction distribution:")
print(f"Predicted 0 (not attracted): {np.sum(val_pred_binary == 0)}")
print(f"Predicted 1 (attracted): {np.sum(val_pred_binary == 1)}")
print(f"Actual 0 (not attracted): {np.sum(data_dict['y_val'] == 0)}")
print(f"Actual 1 (attracted): {np.sum(data_dict['y_val'] == 1)}")


Best validation loss: 0.0495
Best validation accuracy: 1.0000

Final model performance:
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 1.0000 - loss: 0.0495 - precision: 1.0000 - recall: 1.0000
Validation Loss: 0.0495
Validation Accuracy: 1.0000
Validation Precision: 1.0000
Validation Recall: 1.0000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 546ms/step

Prediction distribution:
Predicted 0 (not attracted): 27
Predicted 1 (attracted): 26
Actual 0 (not attracted): 27
Actual 1 (attracted): 26
