In [1]:
# Library imports
import torch
import os
import torch.nn as nn
from torch.utils.data import DataLoader as TorchDataLoader
import pandas as pd

# Our imports
from DL_vs_HateSpeech.loading_data.dataloader import DataLoader
from DL_vs_HateSpeech.training.training import collate_fn
from DL_vs_HateSpeech.utils import check_frozen_params
from DL_vs_HateSpeech.models.utils import load_model_from_path
from DL_vs_HateSpeech.evaluation.evaluate import evaluate


# Some constants
DATA_SUBSET = "us_pol"
BATCH_SIZE = 1


# Load Data
train_dataset = DataLoader(type="train", subset=DATA_SUBSET)
test_dataset = DataLoader(type="test", subset=DATA_SUBSET)
train_loader = TorchDataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
test_loader = TorchDataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Function to evaluate all the models
def create_metric_df(path):
    """
    Create a DataFrame with the metrics of all models in the given path.
    """
    # Get all the model files in the directory
    model_files = [f for f in os.listdir(path) if f.endswith('.pth')]
    
    # Dataset to store the model accuracies and F1 scores
    df = pd.DataFrame(columns=["accuracy", "f1_score_0", "f1_score_1", "avg_loss"])
    
    for file_name in model_files:
        model_v2_16 = load_model_from_path(path, file_name=file_name, device="cpu")
        model_v2_16.eval()
        
        # Check how many parameters are frozen
        check_frozen_params(model_v2_16)
        
        # Evaluate the model
        avg_loss_test, accuracy_test, f1_test = evaluate(model_v2_16, test_loader, nn.BCEWithLogitsLoss(), device="cpu")
        
        # Print the results
        print(f"Model: {file_name}")
        print(f"Accuracy: {accuracy_test:.4f}")
        print(f"F1 Score (0): {f1_test[0]:.4f}")
        print(f"F1 Score (1): {f1_test[1]:.4f}")
        print(f"Average Loss: {avg_loss_test:.4f}")
        
        # Append the results to the DataFrame
        df.loc[len(df)] = {
            "accuracy": accuracy_test,
            "f1_score_0": f1_test[0],
            "f1_score_1": f1_test[1],
            "avg_loss": avg_loss_test
        }
    
    # Save the DataFrame to a CSV file
    df.to_csv(os.path.join(path, "model_metrics.csv"), index=False)
    return df

# Best Models

## Best model using CLIP of type 16

### With augmentation

In [3]:
path = "ModelV2_clip_16_aug_True"
create_metric_df(path)


Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [01:44<00:00,  3.39it/s]


Model: model_1.pth
Accuracy: 0.4873
F1 Score (0): 0.0421
F1 Score (1): 0.6500
Average Loss: 0.6944
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:09<00:00,  2.75it/s]


Model: model_2.pth
Accuracy: 0.5831
F1 Score (0): 0.6606
F1 Score (1): 0.4599
Average Loss: 0.6917
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:21<00:00,  2.50it/s]


Model: model_3.pth
Accuracy: 0.4873
F1 Score (0): 0.0619
F1 Score (1): 0.6473
Average Loss: 0.6934
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:22<00:00,  2.49it/s]


Model: model_4.pth
Accuracy: 0.4817
F1 Score (0): 0.0000
F1 Score (1): 0.6502
Average Loss: 0.6934
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:24<00:00,  2.45it/s]

Model: model_5.pth
Accuracy: 0.4817
F1 Score (0): 0.0000
F1 Score (1): 0.6502
Average Loss: 0.6935





Unnamed: 0,accuracy,f1_score_0,f1_score_1,avg_loss
0,0.487324,0.042105,0.65,0.694437
1,0.583099,0.66055,0.459854,0.691728
2,0.487324,0.061856,0.647287,0.693399
3,0.48169,0.0,0.65019,0.69343
4,0.48169,0.0,0.65019,0.693461


### Without augmentation

In [4]:
path = "ModelV2_clip_16_aug_False"
create_metric_df(path)

Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:29<00:00,  2.37it/s]


Model: model_1.pth
Accuracy: 0.6197
F1 Score (0): 0.6154
F1 Score (1): 0.6240
Average Loss: 0.6428
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:23<00:00,  2.48it/s]


Model: model_10.pth
Accuracy: 0.5775
F1 Score (0): 0.6359
F1 Score (1): 0.4966
Average Loss: 2.3986
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:20<00:00,  2.52it/s]


Model: model_2.pth
Accuracy: 0.6451
F1 Score (0): 0.6850
F1 Score (1): 0.5935
Average Loss: 0.6320
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:23<00:00,  2.47it/s]


Model: model_3.pth
Accuracy: 0.5972
F1 Score (0): 0.6697
F1 Score (1): 0.4838
Average Loss: 1.8523
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:32<00:00,  2.33it/s]


Model: model_4.pth
Accuracy: 0.6000
F1 Score (0): 0.6682
F1 Score (1): 0.4965
Average Loss: 0.7461
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:23<00:00,  2.47it/s]


Model: model_5.pth
Accuracy: 0.6085
F1 Score (0): 0.6729
F1 Score (1): 0.5123
Average Loss: 1.5892
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:14<00:00,  2.64it/s]


Model: model_6.pth
Accuracy: 0.5859
F1 Score (0): 0.6755
F1 Score (1): 0.4280
Average Loss: 2.2696
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:07<00:00,  2.78it/s]


Model: model_7.pth
Accuracy: 0.6169
F1 Score (0): 0.6495
F1 Score (1): 0.5776
Average Loss: 2.0357
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:05<00:00,  2.82it/s]


Model: model_8.pth
Accuracy: 0.5972
F1 Score (0): 0.6521
F1 Score (1): 0.5217
Average Loss: 1.7629
Trainable params: 4473089, Frozen params: 149620737


Evaluating: 100%|██████████| 355/355 [02:08<00:00,  2.77it/s]

Model: model_9.pth
Accuracy: 0.6254
F1 Score (0): 0.6616
F1 Score (1): 0.5804
Average Loss: 2.2509





Unnamed: 0,accuracy,f1_score_0,f1_score_1,avg_loss
0,0.619718,0.615385,0.623955,0.642766
1,0.577465,0.635922,0.496644,2.398572
2,0.64507,0.685,0.593548,0.632012
3,0.597183,0.669746,0.483755,1.852317
4,0.6,0.668224,0.496454,0.746093
5,0.608451,0.672941,0.512281,1.589199
6,0.585915,0.675497,0.428016,2.269578
7,0.616901,0.649485,0.57764,2.035747
8,0.597183,0.652068,0.521739,1.762853
9,0.625352,0.661578,0.580442,2.250937


## Best model using CLIP of type 32

### With augmentation

In [5]:
path = "ModelV2_clip_32_aug_True"
create_metric_df(path)


Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:05<00:00,  5.42it/s]


Model: model_epoch_10_ac_0.504225352112676.pth
Accuracy: 0.5014
F1 Score (0): 0.2594
F1 Score (1): 0.6242
Average Loss: 0.6932
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.50it/s]


Model: model_epoch_20_ac_0.476056338028169.pth
Accuracy: 0.4761
F1 Score (0): 0.0000
F1 Score (1): 0.6450
Average Loss: 0.6936
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.51it/s]


Model: model_epoch_30_ac_0.49014084507042255.pth
Accuracy: 0.4901
F1 Score (0): 0.0524
F1 Score (1): 0.6513
Average Loss: 0.6933
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.53it/s]


Model: model_epoch_40_ac_0.48732394366197185.pth
Accuracy: 0.4845
F1 Score (0): 0.0214
F1 Score (1): 0.6501
Average Loss: 0.6934
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:05<00:00,  5.45it/s]

Model: model_epoch_50_ac_0.4788732394366197.pth
Accuracy: 0.4873
F1 Score (0): 0.0319
F1 Score (1): 0.6513
Average Loss: 0.6931





Unnamed: 0,accuracy,f1_score_0,f1_score_1,avg_loss
0,0.501408,0.259414,0.624204,0.693226
1,0.476056,0.0,0.645038,0.693593
2,0.490141,0.052356,0.651252,0.693282
3,0.484507,0.02139,0.650096,0.693428
4,0.487324,0.031915,0.651341,0.693139


### Without augmentation

In [6]:
path = "ModelV2_clip_32_aug_False"
create_metric_df(path)

Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.54it/s]


Model: model_1.pth
Accuracy: 0.5859
F1 Score (0): 0.6038
F1 Score (1): 0.5664
Average Loss: 0.6650
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.49it/s]


Model: model_10.pth
Accuracy: 0.5662
F1 Score (0): 0.5746
F1 Score (1): 0.5575
Average Loss: 2.2015
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.51it/s]


Model: model_2.pth
Accuracy: 0.5887
F1 Score (0): 0.5466
F1 Score (1): 0.6237
Average Loss: 0.6805
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.47it/s]


Model: model_3.pth
Accuracy: 0.5690
F1 Score (0): 0.5920
F1 Score (1): 0.5433
Average Loss: 0.6797
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.47it/s]


Model: model_4.pth
Accuracy: 0.5408
F1 Score (0): 0.4985
F1 Score (1): 0.5766
Average Loss: 1.9660
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:04<00:00,  5.53it/s]


Model: model_5.pth
Accuracy: 0.5944
F1 Score (0): 0.6000
F1 Score (1): 0.5886
Average Loss: 1.7066
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:18<00:00,  4.50it/s]


Model: model_6.pth
Accuracy: 0.5493
F1 Score (0): 0.5855
F1 Score (1): 0.5062
Average Loss: 1.4033
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:07<00:00,  5.26it/s]


Model: model_7.pth
Accuracy: 0.5746
F1 Score (0): 0.5698
F1 Score (1): 0.5794
Average Loss: 1.5673
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:13<00:00,  4.85it/s]


Model: model_8.pth
Accuracy: 0.5690
F1 Score (0): 0.5591
F1 Score (1): 0.5785
Average Loss: 1.8547
Trainable params: 4473089, Frozen params: 151277313


Evaluating: 100%|██████████| 355/355 [01:14<00:00,  4.76it/s]

Model: model_9.pth
Accuracy: 0.5803
F1 Score (0): 0.6005
F1 Score (1): 0.5579
Average Loss: 1.7532





Unnamed: 0,accuracy,f1_score_0,f1_score_1,avg_loss
0,0.585915,0.603774,0.566372,0.665003
1,0.566197,0.574586,0.557471,2.201488
2,0.588732,0.546584,0.623711,0.68047
3,0.569014,0.592,0.543284,0.679677
4,0.540845,0.498462,0.576623,1.966046
5,0.594366,0.6,0.588571,1.706553
6,0.549296,0.585492,0.506173,1.403328
7,0.574648,0.569801,0.579387,1.567297
8,0.569014,0.559078,0.578512,1.8547
9,0.580282,0.600536,0.557864,1.75317
