In [1]:
import pandas as pd
from sklearn.metrics import classification_report
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from collections import Counter
from itertools import product

The extensive experiments using different
classifiers and combinations of different vision and text features
on multiple sentiment scenarios showed that late fusion is more
effective than early fusion. The analysis of explainability revealed
that in late fusion, the classes were predominantly influenced by
the respective uni-modal prediction probabilities, indicating the necessity for extracting more appropriate features through additional
fine-tuning procedures

In [2]:
fusion_df = pd.read_csv("outputs_perModel.csv", 
                         encoding='utf-8', 
                         header=0)

In [3]:
fusion_df

Unnamed: 0,video_id,clip_id,processed_text,mode,annotation_label,probText_0,probText_1,textLabel,probAudio_0,probAudio_1,audioLabel,probVideo_0,probVideo_1,videoLabel
0,03bSnISJMiM,11,a lot of sad part,train,0.0,0.997359,0.002641,0,0.399598,0.600402,1,0.495560,0.504440,1
1,03bSnISJMiM,10,there is sad part,train,0.0,0.997300,0.002700,0,0.439418,0.560582,1,0.493912,0.506088,1
2,03bSnISJMiM,13,and it a really funny,train,1.0,0.002918,0.997082,1,0.350468,0.649532,1,0.450571,0.549429,1
3,03bSnISJMiM,12,but it wa really really awesome,train,1.0,0.002906,0.997094,1,0.347897,0.652103,1,0.402363,0.597637,1
4,03bSnISJMiM,1,anyhow it wa really good,train,1.0,0.002872,0.997128,1,0.435212,0.564788,1,0.317268,0.682732,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2194,zhpQhgha_KU,30,because there really wa not all that much to i...,test,0.0,0.997182,0.002818,0,0.597331,0.402669,0,0.445997,0.554003,1
2195,zhpQhgha_KU,35,so if you like to hear a like more positive re...,test,1.0,0.002930,0.997070,1,0.399418,0.600582,1,0.545271,0.454729,0
2196,zhpQhgha_KU,34,and she really enjoyed the film,test,1.0,0.002905,0.997095,1,0.341855,0.658145,1,0.461881,0.538119,1
2197,zhpQhgha_KU,33,if you do want to see somebody who is possibly...,test,0.0,0.996939,0.003061,0,0.406756,0.593244,1,0.504509,0.495491,0


In [4]:
def printReport(df, column_name):
    subsets = ['train', 'valid', 'test']
        
    print("Classification Report for the Whole Dataset:")
    print(classification_report(df['annotation_label'], df[column_name], digits=4))
    
    for subset in subsets:
        subset_df = df[df['mode'] == subset]
        if not subset_df.empty:
            print(f"\nClassification Report for {subset.capitalize()} Subset:")
            print(classification_report(subset_df['annotation_label'], subset_df[column_name], digits=4))

---
# Hard Fusion (Based on labels)   
---

https://chatgpt.com/c/67981b72-fdf4-8002-95bd-5a75e258e4c3

# Majority Voting

In [10]:
# Function for majority voting
def majority_vote(row):
    # Collect the predicted labels
    labels = [row['textLabel'], 
              row['audioLabel'],
              row['videoLabel'],
             ]
    
    # Count the occurrences of each class
    class_0_count = labels.count(0)
    class_1_count = labels.count(1)
    
    # Apply majority voting
    if class_0_count > class_1_count:
        return 0
    elif class_1_count > class_0_count:
        return 1
    else:
        # Tie-breaking rule (e.g., prioritize textLabel or assign -1 for a tie)
        return row['textLabel']  # Example: prioritizing text predictions

In [11]:
# Apply majority voting to each row
fusion_df['majorityLabel'] = fusion_df.apply(majority_vote, axis=1)

In [12]:
fusion_df

Unnamed: 0,video_id,clip_id,processed_text,mode,annotation_label,probText_0,probText_1,textLabel,probAudio_0,probAudio_1,audioLabel,probVideo_0,probVideo_1,videoLabel,majorityLabel
0,03bSnISJMiM,11,a lot of sad part,train,0.0,0.997359,0.002641,0,0.399598,0.600402,1,0.495560,0.504440,1,1
1,03bSnISJMiM,10,there is sad part,train,0.0,0.997300,0.002700,0,0.439418,0.560582,1,0.493912,0.506088,1,1
2,03bSnISJMiM,13,and it a really funny,train,1.0,0.002918,0.997082,1,0.350468,0.649532,1,0.450571,0.549429,1,1
3,03bSnISJMiM,12,but it wa really really awesome,train,1.0,0.002906,0.997094,1,0.347897,0.652103,1,0.402363,0.597637,1,1
4,03bSnISJMiM,1,anyhow it wa really good,train,1.0,0.002872,0.997128,1,0.435212,0.564788,1,0.317268,0.682732,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2194,zhpQhgha_KU,30,because there really wa not all that much to i...,test,0.0,0.997182,0.002818,0,0.597331,0.402669,0,0.445997,0.554003,1,0
2195,zhpQhgha_KU,35,so if you like to hear a like more positive re...,test,1.0,0.002930,0.997070,1,0.399418,0.600582,1,0.545271,0.454729,0,1
2196,zhpQhgha_KU,34,and she really enjoyed the film,test,1.0,0.002905,0.997095,1,0.341855,0.658145,1,0.461881,0.538119,1,1
2197,zhpQhgha_KU,33,if you do want to see somebody who is possibly...,test,0.0,0.996939,0.003061,0,0.406756,0.593244,1,0.504509,0.495491,0,0


In [13]:
printReport(fusion_df, 'majorityLabel')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8413    0.7048    0.7670      1023
         1.0     0.7750    0.8844    0.8261      1176

    accuracy                         0.8008      2199
   macro avg     0.8081    0.7946    0.7965      2199
weighted avg     0.8058    0.8008    0.7986      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8911    0.8007    0.8435       552
         1.0     0.8604    0.9262    0.8921       732

    accuracy                         0.8723      1284
   macro avg     0.8758    0.8635    0.8678      1284
weighted avg     0.8736    0.8723    0.8712      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.6212    0.4457    0.5190        92
         1.0     0.6871    0.8175    0.7467       137

    accuracy                         0.6681       229
 

# Weighted Voting

## Weights for each modality based on validation set label's accuracy

In [14]:
def calculate_weights(df, mode='valid'):
    """Calculate weights for each modality based on validation accuracy."""
    validation_df = df[df['mode'] == mode]
    
    text_accuracy = (validation_df['textLabel'] == validation_df['annotation_label']).mean()
    audio_accuracy = (validation_df['audioLabel'] == validation_df['annotation_label']).mean()
    video_accuracy = (validation_df['videoLabel'] == validation_df['annotation_label']).mean()

    return {
        'text': text_accuracy,
        'audio': audio_accuracy,
        'video': video_accuracy
    }

In [15]:
weights = calculate_weights(fusion_df)

print(weights)

{'text': 0.8646288209606987, 'audio': 0.6069868995633187, 'video': 0.45414847161572053}


In [16]:
def weighted_voting_fusion(df, weights):
    """Perform late fusion using Weighted Voting."""
    def fused_label(row):
        # Calculate the weighted votes
        votes = Counter({
            row['textLabel']: weights['text'],
            row['audioLabel']: weights['audio'],
            row['videoLabel']: weights['video']
        })
        # Return the label with the highest weight
        return votes.most_common(1)[0][0]

    df['weightedVotingOnLabel_validationAccuracyWeights_Label'] = df.apply(fused_label, axis=1)
    return df

In [17]:
fusion_df = weighted_voting_fusion(fusion_df, weights)

In [18]:
fusion_df

Unnamed: 0,video_id,clip_id,processed_text,mode,annotation_label,probText_0,probText_1,textLabel,probAudio_0,probAudio_1,audioLabel,probVideo_0,probVideo_1,videoLabel,majorityLabel,weightedVotingOnLabel_validationAccuracyWeights_Label
0,03bSnISJMiM,11,a lot of sad part,train,0.0,0.997359,0.002641,0,0.399598,0.600402,1,0.495560,0.504440,1,1,0
1,03bSnISJMiM,10,there is sad part,train,0.0,0.997300,0.002700,0,0.439418,0.560582,1,0.493912,0.506088,1,1,0
2,03bSnISJMiM,13,and it a really funny,train,1.0,0.002918,0.997082,1,0.350468,0.649532,1,0.450571,0.549429,1,1,1
3,03bSnISJMiM,12,but it wa really really awesome,train,1.0,0.002906,0.997094,1,0.347897,0.652103,1,0.402363,0.597637,1,1,1
4,03bSnISJMiM,1,anyhow it wa really good,train,1.0,0.002872,0.997128,1,0.435212,0.564788,1,0.317268,0.682732,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2194,zhpQhgha_KU,30,because there really wa not all that much to i...,test,0.0,0.997182,0.002818,0,0.597331,0.402669,0,0.445997,0.554003,1,0,0
2195,zhpQhgha_KU,35,so if you like to hear a like more positive re...,test,1.0,0.002930,0.997070,1,0.399418,0.600582,1,0.545271,0.454729,0,1,1
2196,zhpQhgha_KU,34,and she really enjoyed the film,test,1.0,0.002905,0.997095,1,0.341855,0.658145,1,0.461881,0.538119,1,1,1
2197,zhpQhgha_KU,33,if you do want to see somebody who is possibly...,test,0.0,0.996939,0.003061,0,0.406756,0.593244,1,0.504509,0.495491,0,0,1


In [19]:
printReport(fusion_df, 'weightedVotingOnLabel_validationAccuracyWeights_Label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.7856    0.7019    0.7414      1023
         1.0     0.7626    0.8333    0.7964      1176

    accuracy                         0.7722      2199
   macro avg     0.7741    0.7676    0.7689      2199
weighted avg     0.7733    0.7722    0.7708      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.7932    0.6812    0.7329       552
         1.0     0.7827    0.8661    0.8223       732

    accuracy                         0.7866      1284
   macro avg     0.7880    0.7736    0.7776      1284
weighted avg     0.7872    0.7866    0.7839      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.7527    0.7609    0.7568        92
         1.0     0.8382    0.8321    0.8352       137

    accuracy                         0.8035       229
 

## Weights for each modality using logistic regression on labels

There is no reason for us to calculate the weights using logistic regression on the predicted labels (not probabilities) since they will give the same results.

The test accuracy remains the same whether weights for each modality are calculated using logistic regression on the validation set labels or directly based on validation set accuracy because both methods fundamentally measure the reliability of each modality. 

Logistic regression, when applied to labels, essentially learns a simple model that reflects the alignment between each modality’s predictions and the ground truth, which is conceptually similar to calculating validation accuracy. 

In both cases, the derived weights reflect the relative reliability of the modalities and are normalized to ensure comparability. Consequently, the weighted voting process amplifies the influence of more reliable modalities in the same way, leading to similar test accuracy outcomes.

In [20]:
# from sklearn.linear_model import LogisticRegression
# from sklearn.preprocessing import LabelEncoder
# from sklearn.metrics import accuracy_score

# def calculate_weights_with_logistic_regression(df, mode='valid'):
#     """Calculate weights for each modality using logistic regression."""
#     validation_df = df[df['mode'] == mode]
#     X = validation_df[['probText_0', 'probText_1', 'probAudio_0', 'probAudio_1', 'probVideo_0', 'probVideo_1']]
#     y = validation_df['annotation_label']
    
#     # Encode labels as integers
#     le = LabelEncoder()
#     y_encoded = le.fit_transform(y)

#     # Train logistic regression models for each modality
#     models = {}
#     accuracies = {}
    
#     for modality, cols in {
#         'text': ['probText_0', 'probText_1'],
#         'audio': ['probAudio_0', 'probAudio_1'],
#         'video': ['probVideo_0', 'probVideo_1']
#     }.items():
#         X_modality = validation_df[cols]
#         model = LogisticRegression()
#         model.fit(X_modality, y_encoded)
#         models[modality] = model
        
#         # Evaluate accuracy on the validation subset
#         y_pred = model.predict(X_modality)
#         accuracies[modality] = accuracy_score(y_encoded, y_pred)

#     # Normalize weights so they sum to 1
#     total_accuracy = sum(accuracies.values())
#     weights = {modality: acc / total_accuracy for modality, acc in accuracies.items()}

#     return weights

# k-NN

In [96]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

def fusion_knn_rule_with_tuning_label(df, k_range, text="knnLabel_label", train_mode="valid"):
    """
    Apply k-Nearest Neighbors (kNN) for late fusion with hyperparameter tuning on k using 5-fold cross-validation.

    Parameters:
    - df: pandas DataFrame with columns probText_0, probAudio_0, probVideo_0, probText_1, probAudio_1, probVideo_1
    - k_range: list of int, range of k values to tune (e.g., [1, 3, 5, 7, 9])
    - text: str, the name of the column to store the resulting labels
    - mode_column: str, the name of the column indicating the dataset mode (e.g., 'train', 'valid')
    - mode_value: str, the value in mode_column to use for validation (e.g., 'valid')

    Returns:
    - df: DataFrame with the new column containing the fused labels
    - best_k: int, the best k value found during hyperparameter tuning
    """
    # Filter the validation subset
    valid_df = df[df['mode'] == train_mode]

    # Extract features (probabilities) and labels
    X = valid_df[['textLabel', 'audioLabel', 'videoLabel']]
    y = valid_df['annotation_label']  # Assuming you have a 'label' column in your DataFrame

    # Initialize kNN classifier
    knn = KNeighborsClassifier()

    # Set up GridSearchCV for hyperparameter tuning
    param_grid = {'n_neighbors': range(1, k_range)}
    grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')  # 5-fold cross-validation

    # Perform hyperparameter tuning
    grid_search.fit(X, y)

    # Get the best k value
    best_k = grid_search.best_params_['n_neighbors']
    print("Best k: ", best_k)

    # Train the kNN model on the entire validation set using the best k
    best_knn = KNeighborsClassifier(n_neighbors=best_k)
    best_knn.fit(X, y)

    # Predict labels for the entire dataset using the best kNN model
    X_all = df[['textLabel', 'audioLabel', 'videoLabel']]
    df[text] = best_knn.predict(X_all)

    return df

In [97]:
fusion_df = fusion_knn_rule_with_tuning_label(fusion_df, k_range=50)

Best k:  7


In [98]:
printReport(fusion_df, 'knnLabel_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8649    0.9013    0.8827      1023
         1.0     0.9109    0.8776    0.8939      1176

    accuracy                         0.8886      2199
   macro avg     0.8879    0.8894    0.8883      2199
weighted avg     0.8895    0.8886    0.8887      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8898    0.9366    0.9126       552
         1.0     0.9502    0.9126    0.9310       732

    accuracy                         0.9229      1284
   macro avg     0.9200    0.9246    0.9218      1284
weighted avg     0.9243    0.9229    0.9231      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 

---
# Soft Fusion (Based on class posterior probabilities)   
---

# Averaging

Combine the predicted probabilities by taking their arithmetic mean.

In [21]:
def averaging_fusion(df, weights=[1/3, 1/3, 1/3], text='averaging_label'):
    # Normalize weights to ensure they sum to 1
    weights = [w / sum(weights) for w in weights]

    def avg_prob_label(row):
        # Calculate weighted probabilities for class 0 and class 1
        avg_prob_0 = (row['probText_0'] * weights[0] +
                      row['probAudio_0'] * weights[1] +
                      row['probVideo_0'] * weights[2])
        avg_prob_1 = (row['probText_1'] * weights[0] +
                      row['probAudio_1'] * weights[1] +
                      row['probVideo_1'] * weights[2])
        
        # Return the final label (0 or 1) based on the higher weighted probability
        return 0 if avg_prob_0 > avg_prob_1 else 1

    # Apply the nested function to each row of the DataFrame
    df[text] = df.apply(avg_prob_label, axis=1)
    return df

In [22]:
fusion_df = averaging_fusion(fusion_df)

In [23]:
printReport(fusion_df, 'averaging_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8641    0.9013    0.8823      1023
         1.0     0.9108    0.8767    0.8934      1176

    accuracy                         0.8881      2199
   macro avg     0.8874    0.8890    0.8879      2199
weighted avg     0.8891    0.8881    0.8882      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8898    0.9366    0.9126       552
         1.0     0.9502    0.9126    0.9310       732

    accuracy                         0.9229      1284
   macro avg     0.9200    0.9246    0.9218      1284
weighted avg     0.9243    0.9229    0.9231      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 

## Weighted Averaging

### Weights calculating my maximizing the validation set accuracy

In [24]:
from sklearn.linear_model import LogisticRegression
# from sklearn.preprocessing import normalize
import numpy as np

def calculate_weights_logistic(df):
    """
    Calculate weights using logistic regression based on probabilities for text, audio, and video.

    Parameters:
    - df: DataFrame containing probabilities and target labels.
    - target_column: Name of the column containing the true labels (0 or 1).

    Returns:
    - Normalized weights for text, audio, and video.
    """

    df = df[fusion_df['mode'] == 'valid']
    
    # Extract feature columns (probabilities for text, audio, video for class 1)
    X = df[['probText_1', 'probAudio_1', 'probVideo_1']].values
    y = df['annotation_label'].values  # True labels

    # Fit logistic regression model
    model = LogisticRegression()
    model.fit(X, y)
    
    # Extract coefficients and normalize to sum to 1
    raw_weights = np.abs(model.coef_[0])  # Take absolute values of coefficients
    normalized_weights = raw_weights / raw_weights.sum()

    return normalized_weights.tolist()

In [25]:
weights_weightedAvg = calculate_weights_logistic(fusion_df)

print(weights_weightedAvg)

[0.8880921469739113, 0.06622909384963249, 0.04567875917645638]


In [26]:
fusion_df = averaging_fusion(fusion_df, weights=weights_weightedAvg, text='weightedAveraging_label')

In [27]:
printReport(fusion_df, 'weightedAveraging_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8649    0.9013    0.8827      1023
         1.0     0.9109    0.8776    0.8939      1176

    accuracy                         0.8886      2199
   macro avg     0.8879    0.8894    0.8883      2199
weighted avg     0.8895    0.8886    0.8887      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8898    0.9366    0.9126       552
         1.0     0.9502    0.9126    0.9310       732

    accuracy                         0.9229      1284
   macro avg     0.9200    0.9246    0.9218      1284
weighted avg     0.9243    0.9229    0.9231      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 

# Min-Max Rule

In [28]:
def fusion_minmax_rule(df, rule, text="minmax_label"):
    """
    Apply the min or max rule for late fusion to assign labels based on probabilities.

    Parameters:
    - df: pandas DataFrame with columns probText_0, probAudio_0, probVideo_0, probText_1, probAudio_1, probVideo_1
    - rule: str, "min" or "max" to specify which fusion rule to use
    - text: str, the name of the column to store the resulting labels

    Returns:
    - df: DataFrame with the new column containing the fused labels
    """
    def minmax_prob_label(row):
        # Extract probabilities for each class
        probs_0 = [row['probText_0'], row['probAudio_0'], row['probVideo_0']]
        probs_1 = [row['probText_1'], row['probAudio_1'], row['probVideo_1']]

        # Apply the selected rule
        if rule == "min":
            fused_prob_0 = min(probs_0)
            fused_prob_1 = min(probs_1)
        elif rule == "max":
            fused_prob_0 = max(probs_0)
            fused_prob_1 = max(probs_1)

        # Return the final label (0 or 1) based on the higher fused probability
        
        return 0 if fused_prob_0 > fused_prob_1 else 1

    # Apply the nested function to each row of the DataFrame
    df[text] = df.apply(minmax_prob_label, axis=1)
    return df

In [29]:
fusion_df = fusion_minmax_rule(fusion_df, rule="max", text="max_label")

In [30]:
fusion_df

Unnamed: 0,video_id,clip_id,processed_text,mode,annotation_label,probText_0,probText_1,textLabel,probAudio_0,probAudio_1,audioLabel,probVideo_0,probVideo_1,videoLabel,majorityLabel,weightedVotingOnLabel_validationAccuracyWeights_Label,averaging_label,weightedAveraging_label,max_label
0,03bSnISJMiM,11,a lot of sad part,train,0.0,0.997359,0.002641,0,0.399598,0.600402,1,0.495560,0.504440,1,1,0,0,0,0
1,03bSnISJMiM,10,there is sad part,train,0.0,0.997300,0.002700,0,0.439418,0.560582,1,0.493912,0.506088,1,1,0,0,0,0
2,03bSnISJMiM,13,and it a really funny,train,1.0,0.002918,0.997082,1,0.350468,0.649532,1,0.450571,0.549429,1,1,1,1,1,1
3,03bSnISJMiM,12,but it wa really really awesome,train,1.0,0.002906,0.997094,1,0.347897,0.652103,1,0.402363,0.597637,1,1,1,1,1,1
4,03bSnISJMiM,1,anyhow it wa really good,train,1.0,0.002872,0.997128,1,0.435212,0.564788,1,0.317268,0.682732,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2194,zhpQhgha_KU,30,because there really wa not all that much to i...,test,0.0,0.997182,0.002818,0,0.597331,0.402669,0,0.445997,0.554003,1,0,0,0,0,0
2195,zhpQhgha_KU,35,so if you like to hear a like more positive re...,test,1.0,0.002930,0.997070,1,0.399418,0.600582,1,0.545271,0.454729,0,1,1,1,1,1
2196,zhpQhgha_KU,34,and she really enjoyed the film,test,1.0,0.002905,0.997095,1,0.341855,0.658145,1,0.461881,0.538119,1,1,1,1,1,1
2197,zhpQhgha_KU,33,if you do want to see somebody who is possibly...,test,0.0,0.996939,0.003061,0,0.406756,0.593244,1,0.504509,0.495491,0,0,1,0,0,0


In [31]:
printReport(fusion_df, 'max_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8641    0.9013    0.8823      1023
         1.0     0.9108    0.8767    0.8934      1176

    accuracy                         0.8881      2199
   macro avg     0.8874    0.8890    0.8879      2199
weighted avg     0.8891    0.8881    0.8882      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8898    0.9366    0.9126       552
         1.0     0.9502    0.9126    0.9310       732

    accuracy                         0.9229      1284
   macro avg     0.9200    0.9246    0.9218      1284
weighted avg     0.9243    0.9229    0.9231      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 

In [32]:
fusion_df = fusion_minmax_rule(fusion_df, rule="min", text="min_label")

In [33]:
printReport(fusion_df, 'min_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8641    0.9013    0.8823      1023
         1.0     0.9108    0.8767    0.8934      1176

    accuracy                         0.8881      2199
   macro avg     0.8874    0.8890    0.8879      2199
weighted avg     0.8891    0.8881    0.8882      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8898    0.9366    0.9126       552
         1.0     0.9502    0.9126    0.9310       732

    accuracy                         0.9229      1284
   macro avg     0.9200    0.9246    0.9218      1284
weighted avg     0.9243    0.9229    0.9231      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 

In [34]:
fusion_df

Unnamed: 0,video_id,clip_id,processed_text,mode,annotation_label,probText_0,probText_1,textLabel,probAudio_0,probAudio_1,audioLabel,probVideo_0,probVideo_1,videoLabel,majorityLabel,weightedVotingOnLabel_validationAccuracyWeights_Label,averaging_label,weightedAveraging_label,max_label,min_label
0,03bSnISJMiM,11,a lot of sad part,train,0.0,0.997359,0.002641,0,0.399598,0.600402,1,0.495560,0.504440,1,1,0,0,0,0,0
1,03bSnISJMiM,10,there is sad part,train,0.0,0.997300,0.002700,0,0.439418,0.560582,1,0.493912,0.506088,1,1,0,0,0,0,0
2,03bSnISJMiM,13,and it a really funny,train,1.0,0.002918,0.997082,1,0.350468,0.649532,1,0.450571,0.549429,1,1,1,1,1,1,1
3,03bSnISJMiM,12,but it wa really really awesome,train,1.0,0.002906,0.997094,1,0.347897,0.652103,1,0.402363,0.597637,1,1,1,1,1,1,1
4,03bSnISJMiM,1,anyhow it wa really good,train,1.0,0.002872,0.997128,1,0.435212,0.564788,1,0.317268,0.682732,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2194,zhpQhgha_KU,30,because there really wa not all that much to i...,test,0.0,0.997182,0.002818,0,0.597331,0.402669,0,0.445997,0.554003,1,0,0,0,0,0,0
2195,zhpQhgha_KU,35,so if you like to hear a like more positive re...,test,1.0,0.002930,0.997070,1,0.399418,0.600582,1,0.545271,0.454729,0,1,1,1,1,1,1
2196,zhpQhgha_KU,34,and she really enjoyed the film,test,1.0,0.002905,0.997095,1,0.341855,0.658145,1,0.461881,0.538119,1,1,1,1,1,1,1
2197,zhpQhgha_KU,33,if you do want to see somebody who is possibly...,test,0.0,0.996939,0.003061,0,0.406756,0.593244,1,0.504509,0.495491,0,0,1,0,0,0,0


Min and max rules give the same results when the probability rankings across modalities are consistent.   
-> This occurs when the relative order of the probabilities for each class (e.g., class 0 and class 1) remains the same across all modalities.   
In such cases, the minimum and maximum probabilities for each class will still correctly identify the class with the higher overall likelihood, leading to identical final labels.
SO  
-> Agreement Among Modalities  
If the probabilities from all modalities consistently favor the same class (e.g., class 0 or class 1), both the AVERAGING and MIN-MAX rules will likely produce the same final label.  

For example:   
Modalities all strongly favor class 0:   
Text: 0.8, Audio: 0.7, Video: 0.9 → Final decision = class 0.   
In such cases, the actual method of fusion (min, max, or average) does not matter because all modalities already agree.   

When there is significant disagreement among modalities, these rules can diverge, and the choice of rule impacts the final classification.


Balancing Effect of Averaging   
The averaging rule aggregates probabilities, smoothing out extremes.  
This is functionally similar to the way the min and max rules act in certain cases:  
- The min rule emphasizes the weakest agreement across modalities.
- The max rule focuses on the strongest agreement.
- Averaging lies between these extremes, blending the influence of all modalities equally. If no extreme disagreement exists among modalities, the averaged probability may still reflect the dominant class.

---

Situations That Prevent Consistent Results (That would make Min and Max rule give different results)
Differences between the min and max rules arise when the rankings of class probabilities are not consistent across modalities. This happens when:

- One or more modalities strongly favor one class while others favor another.
- Disagreement arises due to noisy or unreliable modalities.

# Product Rule

In [35]:
def product_fusion(df, text='product_label'):
    def prod_prob_label(row):
        # Calculate product of probabilities for class 0 and class 1
        prob_0 = (row['probText_0'] * row['probAudio_0'] * row['probVideo_0'])
        prob_1 = (row['probText_1'] * row['probAudio_1'] * row['probVideo_1'])
        
        # Return the final label (0 or 1) based on the higher product probability
        return 0 if prob_0 > prob_1 else 1

    # Apply the nested function to each row of the DataFrame
    df[text] = df.apply(prod_prob_label, axis=1)
    return df

In [36]:
fusion_df = product_fusion(fusion_df, text="product_label")

In [37]:
printReport(fusion_df, 'product_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8641    0.9013    0.8823      1023
         1.0     0.9108    0.8767    0.8934      1176

    accuracy                         0.8881      2199
   macro avg     0.8874    0.8890    0.8879      2199
weighted avg     0.8891    0.8881    0.8882      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8898    0.9366    0.9126       552
         1.0     0.9502    0.9126    0.9310       732

    accuracy                         0.9229      1284
   macro avg     0.9200    0.9246    0.9218      1284
weighted avg     0.9243    0.9229    0.9231      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 

In [38]:
fusion_df

Unnamed: 0,video_id,clip_id,processed_text,mode,annotation_label,probText_0,probText_1,textLabel,probAudio_0,probAudio_1,...,probVideo_0,probVideo_1,videoLabel,majorityLabel,weightedVotingOnLabel_validationAccuracyWeights_Label,averaging_label,weightedAveraging_label,max_label,min_label,product_label
0,03bSnISJMiM,11,a lot of sad part,train,0.0,0.997359,0.002641,0,0.399598,0.600402,...,0.495560,0.504440,1,1,0,0,0,0,0,0
1,03bSnISJMiM,10,there is sad part,train,0.0,0.997300,0.002700,0,0.439418,0.560582,...,0.493912,0.506088,1,1,0,0,0,0,0,0
2,03bSnISJMiM,13,and it a really funny,train,1.0,0.002918,0.997082,1,0.350468,0.649532,...,0.450571,0.549429,1,1,1,1,1,1,1,1
3,03bSnISJMiM,12,but it wa really really awesome,train,1.0,0.002906,0.997094,1,0.347897,0.652103,...,0.402363,0.597637,1,1,1,1,1,1,1,1
4,03bSnISJMiM,1,anyhow it wa really good,train,1.0,0.002872,0.997128,1,0.435212,0.564788,...,0.317268,0.682732,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2194,zhpQhgha_KU,30,because there really wa not all that much to i...,test,0.0,0.997182,0.002818,0,0.597331,0.402669,...,0.445997,0.554003,1,0,0,0,0,0,0,0
2195,zhpQhgha_KU,35,so if you like to hear a like more positive re...,test,1.0,0.002930,0.997070,1,0.399418,0.600582,...,0.545271,0.454729,0,1,1,1,1,1,1,1
2196,zhpQhgha_KU,34,and she really enjoyed the film,test,1.0,0.002905,0.997095,1,0.341855,0.658145,...,0.461881,0.538119,1,1,1,1,1,1,1,1
2197,zhpQhgha_KU,33,if you do want to see somebody who is possibly...,test,0.0,0.996939,0.003061,0,0.406756,0.593244,...,0.504509,0.495491,0,0,1,0,0,0,0,0


# k-NN

In [89]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

def fusion_knn_rule_with_tuning(df, k_range, text="knn_label", train_mode="valid"):
    """
    Apply k-Nearest Neighbors (kNN) for late fusion with hyperparameter tuning on k using 5-fold cross-validation.

    Parameters:
    - df: pandas DataFrame with columns probText_0, probAudio_0, probVideo_0, probText_1, probAudio_1, probVideo_1
    - k_range: list of int, range of k values to tune (e.g., [1, 3, 5, 7, 9])
    - text: str, the name of the column to store the resulting labels
    - mode_column: str, the name of the column indicating the dataset mode (e.g., 'train', 'valid')
    - mode_value: str, the value in mode_column to use for validation (e.g., 'valid')

    Returns:
    - df: DataFrame with the new column containing the fused labels
    - best_k: int, the best k value found during hyperparameter tuning
    """
    # Filter the validation subset
    valid_df = df[df['mode'] == train_mode]

    # Extract features (probabilities) and labels
    X = valid_df[['probText_0', 'probAudio_0', 'probVideo_0', 'probText_1', 'probAudio_1', 'probVideo_1']]
    y = valid_df['annotation_label']  # Assuming you have a 'label' column in your DataFrame

    # Initialize kNN classifier
    knn = KNeighborsClassifier()

    # Set up GridSearchCV for hyperparameter tuning
    param_grid = {'n_neighbors': range(1, k_range)}
    grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')  # 5-fold cross-validation

    # Perform hyperparameter tuning
    grid_search.fit(X, y)

    # Get the best k value
    best_k = grid_search.best_params_['n_neighbors']
    print("Best k: ", best_k)

    # Train the kNN model on the entire validation set using the best k
    best_knn = KNeighborsClassifier(n_neighbors=best_k)
    best_knn.fit(X, y)

    # Predict labels for the entire dataset using the best kNN model
    X_all = df[['probText_0', 'probAudio_0', 'probVideo_0', 'probText_1', 'probAudio_1', 'probVideo_1']]
    df[text] = best_knn.predict(X_all)

    return df

In [94]:
fusion_df = fusion_knn_rule_with_tuning(fusion_df, k_range=50)

Best k:  9


In [95]:
printReport(fusion_df, 'knn_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8649    0.9013    0.8827      1023
         1.0     0.9109    0.8776    0.8939      1176

    accuracy                         0.8886      2199
   macro avg     0.8879    0.8894    0.8883      2199
weighted avg     0.8895    0.8886    0.8887      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8898    0.9366    0.9126       552
         1.0     0.9502    0.9126    0.9310       732

    accuracy                         0.9229      1284
   macro avg     0.9200    0.9246    0.9218      1284
weighted avg     0.9243    0.9229    0.9231      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 

# Naive Bayes

In [43]:
from sklearn.naive_bayes import GaussianNB
def naive_bayes_fusion(df, text='naive_bayes_label'):
    # df_train = df[df['mode'].isin(['train', 'valid'])]
    df_train = df[df['mode'] == 'valid']

    # Extract the relevant features for probabilities (e.g., probText, probAudio, probVideo for both classes)
    feature_columns = ['probText_0', 'probText_1',
                       'probAudio_0', 'probAudio_1',
                       'probVideo_0', 'probVideo_1']

    # Prepare the feature matrix (X) and pseudo-labels (y) for training the Naive Bayes model
    # We'll assume equal prior probabilities for simplicity.
    X = df_train[feature_columns].values
    y = df_train['annotation_label']

    # Train a Gaussian Naive Bayes model
    gnb = GaussianNB()
    gnb.fit(X, y)

    def predict_label(row):
        # Prepare row features for prediction
        row_features = row[feature_columns].values.reshape(1, -1)
        
        # Predict the label using Naive Bayes
        return gnb.predict(row_features)[0]

    # Apply the prediction function to each row of the DataFrame
    df[text] = df.apply(predict_label, axis=1)
    return df

In [44]:
fusion_df = naive_bayes_fusion(fusion_df)

In [45]:
printReport(fusion_df, 'naive_bayes_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8430    0.6931    0.7607      1023
         1.0     0.7688    0.8878    0.8240      1176

    accuracy                         0.7972      2199
   macro avg     0.8059    0.7904    0.7924      2199
weighted avg     0.8033    0.7972    0.7946      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8552    0.5562    0.6740       552
         1.0     0.7351    0.9290    0.8208       732

    accuracy                         0.7687      1284
   macro avg     0.7951    0.7426    0.7474      1284
weighted avg     0.7867    0.7687    0.7577      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 

# Bagging

## Find oiptimal number of estimators

In [159]:
# # Prepare the features (model probabilities) and the true labels
# X = fusion_df[['probText_0', 'probText_1', 
#                'probAudio_0', 'probAudio_1',
#                'probVideo_0', 'probVideo_1',
#               ]]
# y = fusion_df['annotation_label']

# # Filter out the validation data
# valid_data = fusion_df[fusion_df['mode'] == 'valid']
# X_valid = valid_data[['probText_0', 'probText_1', 
#                        'probAudio_0', 'probAudio_1',
#                        'probVideo_0', 'probVideo_1',
#                       ]]
# y_valid = valid_data['annotation_label']

# # Initialize variables to store results
# best_n_estimators = 0
# best_accuracy = 0

# # Loop over a range of n_estimators (e.g., from 10 to 200)
# for n_estimators in range(1, 201):
#     # Create the base model (Decision Tree) for Bagging
#     base_model = DecisionTreeClassifier()
    
#     # Instantiate the BaggingClassifier with the current n_estimators
#     bagging_model = BaggingClassifier(estimator=base_model, 
#                                       n_estimators=n_estimators, 
#                                       random_state=42)
    
#     # Train the model on the entire training set
#     train_data = fusion_df[fusion_df['mode'] == 'train']
#     X_train = train_data[['probText_0', 'probText_1', 
#                            'probAudio_0', 'probAudio_1',
#                            'probVideo_0', 'probVideo_1',
#                           ]]
#     y_train = train_data['annotation_label']
    
#     bagging_model.fit(X_train, y_train)
    
#     # Predict on the validation set
#     y_pred_valid = bagging_model.predict(X_valid)
    
#     # Calculate the accuracy on the validation set
#     accuracy = accuracy_score(y_valid, y_pred_valid)
    
#     # If the current model has a better accuracy, update the best parameters
#     if accuracy > best_accuracy:
#         best_accuracy = accuracy
#         best_n_estimators = n_estimators

# # Output the best number of estimators and the corresponding accuracy
# print(f"The best number of estimators is: {best_n_estimators}")
# print(f"The corresponding validation set accuracy is: {best_accuracy:.4f}")

## Calculate metrics

In [160]:
# # Prepare the features (model probabilities) and the true labels
# X = fusion_df[['probText_0', 'probText_1', 
#                'probAudio_0', 'probAudio_1',
#                'probVideo_0', 'probVideo_1',
#               ]]
# y = fusion_df['annotation_label']

# # Create the base model (Decision Tree) for Bagging
# base_model = DecisionTreeClassifier()

# # Instantiate BaggingClassifier with Decision Tree as base model
# bagging_model = BaggingClassifier(estimator=base_model, 
#                                   n_estimators=best_n_estimators, 
#                                   random_state=42)

# # Train the model on the training set
# train_data = fusion_df[fusion_df['mode'] == 'train']
# X_train = train_data[['probText_0', 'probText_1', 
#                        'probAudio_0', 'probAudio_1',
#                        'probVideo_0', 'probVideo_1',
#                       ]]
# y_train = train_data['annotation_label']

# bagging_model.fit(X_train, y_train)

# # Predict on the whole dataset
# y_pred = bagging_model.predict(X)

# # Classification report for the entire dataset
# print("Classification Report on the Entire Dataset:")
# print(classification_report(y, y_pred, digits=4))

In [161]:
# for mode in ['train', 'valid', 'test']:
#     subset_data = fusion_df[fusion_df['mode'] == mode]
#     X_subset = subset_data[['probText_0', 'probText_1', 
#                             'probAudio_0', 'probAudio_1',
#                             'probVideo_0', 'probVideo_1',
#                            ]]
#     y_subset = subset_data['annotation_label']
    
#     # Get predictions
#     y_pred = bagging_model.predict(X_subset)
    
#     # Print classification report
#     print(f"\nClassification Report on the {mode.capitalize()} Subset:")
#     print(classification_report(y_subset, y_pred, digits=4))

# Fully Connected Layer

https://scikit-learn.org/dev/modules/generated/sklearn.neural_network.MLPClassifier.html

In [8]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

def train_mlp_using_labels(df, train_mode='train'):
    """
    Train an MLP classifier using labels as features on the 'train' subset, 
    perform grid search to maximize accuracy on the 'valid' subset, and 
    predict final labels for all rows.

    Args:
        df (pd.DataFrame): Input DataFrame with columns:
            'textLabel', 'audioLabel', 'videoLabel', 'annotation_label', 'mode'.
        train_mode (str): The mode to filter the training subset (default: 'train').
    Returns:
        pd.DataFrame: DataFrame with an additional column 'mlp_label' containing the predicted labels.
    """
    # Filter the training and validation subsets
    train_df = df[df['mode'] == train_mode]
    # train_df = df[df['mode'].isin(['train', 'valid'])]
    # valid_df = df[df['mode'] == valid_mode]

    # Features (labels from text, audio, and video)
    X_train = train_df[['textLabel', 'audioLabel', 'videoLabel']]
    y_train = train_df['annotation_label']
    # X_valid = valid_df[['textLabel', 'audioLabel', 'videoLabel']]
    # y_valid = valid_df['annotation_label']

    # Define hyperparameter grid for grid search
    param_grid = {
        'hidden_layer_sizes': [(4,), (8,), (16,), (32,), (64,), (128,), (64, 32), (32, 16), (16, 8), (8, 4)],
        'activation': ['relu', 'tanh', 'identity'],
        'learning_rate_init': [1e-5, 5e-5, 1e-4, 5e-4, 1e-3],
        'batch_size': [4, 8, 16, 32],
        'max_iter': [500]
    }

    # Initialize the MLPClassifier
    mlp = MLPClassifier(early_stopping=True, tol=1e-5)

    # Perform grid search with accuracy as the scoring metric
    grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, scoring='accuracy', cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Select the best estimator based on grid search
    best_mlp = grid_search.best_estimator_
    print("Best params: ", grid_search.best_params_)
    print("Num of iterations: ", best_mlp.n_iter_)

    # # Evaluate the best model on the validation subset
    # valid_predictions = best_mlp.predict(X_valid)
    
    # valid_accuracy = accuracy_score(y_valid, valid_predictions)
    # print(f"Validation Accuracy of Best Model: {valid_accuracy:.4f}")

    # Use the trained best MLP model to predict for the entire dataset
    all_features = df[['textLabel', 'audioLabel', 'videoLabel']]
    df['mlp_label'] = best_mlp.predict(all_features)

    return df

In [9]:
fusion_df = train_mlp_using_labels(fusion_df, train_mode='train')

  arr = np.array(param_list)


Best params:  {'activation': 'relu', 'batch_size': 4, 'hidden_layer_sizes': (16, 8), 'learning_rate_init': 5e-05, 'max_iter': 500}
Num of iterations:  23


In [10]:
printReport(fusion_df, 'mlp_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8413    0.7048    0.7670      1023
         1.0     0.7750    0.8844    0.8261      1176

    accuracy                         0.8008      2199
   macro avg     0.8081    0.7946    0.7965      2199
weighted avg     0.8058    0.8008    0.7986      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8911    0.8007    0.8435       552
         1.0     0.8604    0.9262    0.8921       732

    accuracy                         0.8723      1284
   macro avg     0.8758    0.8635    0.8678      1284
weighted avg     0.8736    0.8723    0.8712      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.6212    0.4457    0.5190        92
         1.0     0.6871    0.8175    0.7467       137

    accuracy                         0.6681       229
 

In [8]:
# from sklearn.neural_network import MLPClassifier
# import pandas as pd

# def train_mlp_using_probabilities(df, mode='valid'):
#     """
#     Train an MLP classifier using probabilities as features from 'text', 'audio', and 'video'
#     on the 'valid' subset and predict the final labels for all rows.

#     Args:
#         df (pd.DataFrame): Input DataFrame with columns:
#             'probText_0', 'probText_1', 'probAudio_0', 'probAudio_1',
#             'probVideo_0', 'probVideo_1', 'annotation_label', 'mode'.
#         mode (str): The mode to filter the validation subset (default: 'valid').

#     Returns:
#         pd.DataFrame: DataFrame with an additional column 'mlp_label' containing the predicted labels.
#     """
#     # Filter the validation subset
#     valid_df = df[df['mode'] == mode]
    
#     # Features (probabilities from text, audio, and video)
#     X = valid_df[['probText_0', 'probText_1', 'probAudio_0', 'probAudio_1', 'probVideo_0', 'probVideo_1']]
    
#     # Target (ground truth labels)
#     y = valid_df['annotation_label']
    
#     # Train the MLP classifier
#     mlp = MLPClassifier(hidden_layer_sizes=(64, ), max_iter=1000000000)
#     mlp.fit(X, y)
    
#     # Use the trained MLP model to predict for the entire dataset
#     all_features = df[['probText_0', 'probText_1', 'probAudio_0', 'probAudio_1', 'probVideo_0', 'probVideo_1']]
#     df['mlpProb_label'] = mlp.predict(all_features).astype('int')
    
#     return df

In [5]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

def train_mlp_using_probabilities(df, train_mode='train'):
    """
    Train an MLP classifier using probabiliy scores as features on the 'train' subset, 
    perform grid search to maximize accuracy on the 'valid' subset, and 
    predict final labels for all rows.
    """
    # Filter the training and validation subsets
    train_df = df[df['mode'] == train_mode]
    # train_df = df[df['mode'].isin(['train', 'valid'])]
    # valid_df = df[df['mode'] == valid_mode]

    # Features (labels from text, audio, and video)
    X_train = train_df[['probText_0', 'probText_1', 'probAudio_0', 'probAudio_1', 'probVideo_0', 'probVideo_1']]
    y_train = train_df['annotation_label']
    # X_valid = valid_df[['probText_0', 'probText_1', 'probAudio_0', 'probAudio_1', 'probVideo_0', 'probVideo_1']]
    # y_valid = valid_df['annotation_label']

    # Define hyperparameter grid for grid search
    param_grid = {
        'hidden_layer_sizes': [(4,), (8,), (16,), (32,), (64,), (128,), (64, 32), (32, 16), (16, 8), (8, 4)],
        'activation': ['relu', 'tanh', 'identity'],
        'learning_rate_init': [1e-5, 5e-5, 1e-4, 5e-4, 1e-3],
        'batch_size': [4, 8, 16, 32],
        'max_iter': [500]
    }

    # Initialize the MLPClassifier
    mlp = MLPClassifier(early_stopping=True, tol=1e-5)

    # Perform grid search with accuracy as the scoring metric
    grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, scoring='accuracy', cv=5, n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Select the best estimator based on grid search
    best_mlp = grid_search.best_estimator_
    print("Best params: ", grid_search.best_params_)
    print("Num of iterations: ", best_mlp.n_iter_)

    # # Evaluate the best model on the validation subset
    # valid_predictions = best_mlp.predict(X_valid)
    
    # valid_accuracy = accuracy_score(y_valid, valid_predictions)
    # print(f"Validation Accuracy of Best Model: {valid_accuracy:.4f}")

    # Use the trained best MLP model to predict for the entire dataset
    all_features = df[['probText_0', 'probText_1', 'probAudio_0', 'probAudio_1', 'probVideo_0', 'probVideo_1']]
    df['mlpProb_label'] = best_mlp.predict(all_features)

    return df

In [6]:
fusion_df = train_mlp_using_probabilities(fusion_df, train_mode='train')

  arr = np.array(param_list)


Best params:  {'activation': 'relu', 'batch_size': 32, 'hidden_layer_sizes': (16, 8), 'learning_rate_init': 0.001, 'max_iter': 500}
Num of iterations:  13


In [7]:
printReport(fusion_df, 'mlpProb_label')

Classification Report for the Whole Dataset:
              precision    recall  f1-score   support

         0.0     0.8662    0.8983    0.8820      1023
         1.0     0.9086    0.8793    0.8937      1176

    accuracy                         0.8881      2199
   macro avg     0.8874    0.8888    0.8878      2199
weighted avg     0.8889    0.8881    0.8882      2199


Classification Report for Train Subset:
              precision    recall  f1-score   support

         0.0     0.8912    0.9348    0.9125       552
         1.0     0.9489    0.9139    0.9311       732

    accuracy                         0.9229      1284
   macro avg     0.9201    0.9244    0.9218      1284
weighted avg     0.9241    0.9229    0.9231      1284


Classification Report for Valid Subset:
              precision    recall  f1-score   support

         0.0     0.8081    0.8696    0.8377        92
         1.0     0.9077    0.8613    0.8839       137

    accuracy                         0.8646       229
 