In [2]:
# Library imports
import torch
import torch.nn as nn
from torch.utils.data import DataLoader as TorchDataLoader

# Our imports
from DL_vs_HateSpeech.loading_data.dataloader import DataLoader
from DL_vs_HateSpeech.training.training import collate_fn
from DL_vs_HateSpeech.plots.plot_loss import plot_losses_from_path, plot_metrics_from_path
from DL_vs_HateSpeech.models.utils import load_model_from_path
from DL_vs_HateSpeech.evaluation.evaluate import evaluate


# Some constants
DATA_SUBSET = "us_pol"
BATCH_SIZE = 1


# Load Data
train_dataset = DataLoader(type="train", subset=DATA_SUBSET)
test_dataset = DataLoader(type="test", subset=DATA_SUBSET)
train_loader = TorchDataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
test_loader = TorchDataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)


# Best Models

## Best model using CLIP of type 16

In [3]:
path = ".\models\model_checkpoints\ModelV2_single_class_clip_16_testing_False"
model_v2_16 = load_model_from_path(path, file_name="model_epoch_2_ac_0.647887323943662.pth")

model_v2_16.eval()

ModelV2(
  (clip): AttentionCLIP(
    (pretrained_model): CLIPModel(
      (text_model): CLIPTextTransformer(
        (embeddings): CLIPTextEmbeddings(
          (token_embedding): Embedding(49408, 512)
          (position_embedding): Embedding(77, 512)
        )
        (encoder): CLIPEncoder(
          (layers): ModuleList(
            (0-11): 12 x CLIPEncoderLayer(
              (self_attn): CLIPAttention(
                (k_proj): Linear(in_features=512, out_features=512, bias=True)
                (v_proj): Linear(in_features=512, out_features=512, bias=True)
                (q_proj): Linear(in_features=512, out_features=512, bias=True)
                (out_proj): Linear(in_features=512, out_features=512, bias=True)
              )
              (layer_norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
              (mlp): CLIPMLP(
                (activation_fn): QuickGELUActivation()
                (fc1): Linear(in_features=512, out_features=2048, bias=True)
        

In [4]:
avg_loss_test, accuracy_test, f1_test = evaluate(model_v2_16, test_loader, nn.BCEWithLogitsLoss(), device="cpu")

Evaluating: 100%|██████████| 355/355 [01:43<00:00,  3.44it/s]


In [5]:
print(f"Average Loss: {avg_loss_test:.4f}")
print(f"Accuracy: {accuracy_test * 100:.2f}%")
print(f"F1 Score label 0: {f1_test[0] * 100:.2f}%")
print(f"F1 Score label 1: {f1_test[1] * 100:.2f}%")

Average Loss: 0.6387
Accuracy: 65.63%
F1 Score label 0: 69.50%
F1 Score label 1: 60.65%


## Best model using CLIP of type 32

In [6]:
path = ".\models\model_checkpoints\ModelV2_single_class_clip_32_testing_False"
model_v2_32 = load_model_from_path(path, file_name="model_epoch_28_ac_0.6225352112676056.pth")

model_v2_32.eval()

ModelV2(
  (clip): AttentionCLIP(
    (pretrained_model): CLIPModel(
      (text_model): CLIPTextTransformer(
        (embeddings): CLIPTextEmbeddings(
          (token_embedding): Embedding(49408, 512)
          (position_embedding): Embedding(77, 512)
        )
        (encoder): CLIPEncoder(
          (layers): ModuleList(
            (0-11): 12 x CLIPEncoderLayer(
              (self_attn): CLIPAttention(
                (k_proj): Linear(in_features=512, out_features=512, bias=True)
                (v_proj): Linear(in_features=512, out_features=512, bias=True)
                (q_proj): Linear(in_features=512, out_features=512, bias=True)
                (out_proj): Linear(in_features=512, out_features=512, bias=True)
              )
              (layer_norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
              (mlp): CLIPMLP(
                (activation_fn): QuickGELUActivation()
                (fc1): Linear(in_features=512, out_features=2048, bias=True)
        

In [7]:
avg_loss_test, accuracy_test, f1_test = evaluate(model_v2_16, test_loader, nn.BCEWithLogitsLoss(), device="cpu")

Evaluating: 100%|██████████| 355/355 [01:40<00:00,  3.54it/s]


In [8]:
print(f"Average Loss: {avg_loss_test:.4f}")
print(f"Accuracy: {accuracy_test * 100:.2f}%")
print(f"F1 Score label 0: {f1_test[0] * 100:.2f}%")
print(f"F1 Score label 1: {f1_test[1] * 100:.2f}%")

Average Loss: 0.6334
Accuracy: 65.07%
F1 Score label 0: 68.69%
F1 Score label 1: 60.51%
