In [None]:
!pip install vaderSentiment



In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer
import spacy
import nltk
import re
import string
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize, word_tokenize

In [None]:
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
analyzer = SentimentIntensityAnalyzer()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [None]:
# read the csv datasets
train_df = pd.read_csv('train_en_dataset.csv')
test_df = pd.read_csv('test_en_dataset.csv')

In [None]:
class TweetDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        tweet = self.data.iloc[idx]['tweet']
        label = self.data.iloc[idx]['value']
        return tweet, label

In [None]:
train_dataset = TweetDataset(train_df)
test_dataset = TweetDataset(test_df)

In [None]:
train_dataset[0:5]

(0    “mansplaining” is literally just how intellige...
 1    if you don’t want me but your friend do, dont ...
 2    @username @username @username @username isn't ...
 3    @username's account is temporarily unavailable...
 4    @username if it wasn't for the gender biases o...
 Name: tweet, dtype: object,
 0    1.0
 1    1.0
 2    1.0
 3    0.0
 4    1.0
 Name: value, dtype: float64)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
nlp = spacy.load('en_core_web_sm')

In [None]:
# English women-hatred lexicon - eng
en_lex_df = pd.read_csv('hurtlex_EN.tsv', sep='\t')
en_lex = en_lex_df['lemma'].tolist()

In [None]:
def handcrafted_features(texts):
    features = []
    for text in texts:
        # doc = nlp(text)
        # 1. token count in a tweet
        tokens = nltk.word_tokenize(text)
        token_num_per_tweet = len(tokens)

        # 2. average number of chracters of words in a tweet
        # to show the complexity of the choice of word in each tweet
        char_num_per_tweet = sum(len(token) for token in tokens)
        avg_char_num_per_token = char_num_per_tweet / token_num_per_tweet if token_num_per_tweet != 0 else 0

        # 3. sentence count in a tweet
        sentences = nltk.sent_tokenize(text)
        sentence_num = len(sentences)

        # 4. number of hastags
        hashtag_num = len(re.findall(r'#(?!URL\b)\w+', text))

        # 5. number of mentions
        mention_num = text.count('@username')

        # 6. number of links
        link_num = text.count('#URL')

        # 7. sentiment feature of each tweet using VADER
        sentiment_scores = analyzer.polarity_scores(text)
        sentiment_compound = sentiment_scores['compound']  # compound score from -1 to 1

        # 8. number of seixst words of each tweet in sexism lexicon
        sexwords_count = len([token for token in tokens if token.lower() in en_lex])

        # 9. ratio of sexist word in a tweet
        sexwords_ratio = sexwords_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 10. number of all punctuations of each tweet
        punctuation_count = sum(1 for char in text if char in string.punctuation)

        # 11. ratio of punctuations in relation to the number of words
        punctuation_ratio = punctuation_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 12. number of exclamation marks
        exclamation_count = text.count('!')

        # 13. ratio of exclamation marks
        exclamation_ratio = exclamation_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 14. number of question marks
        question_count = text.count('?')

        # 15. ratio of question marks
        question_ratio = question_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 16. number of emojis in each tweet
        emoji_count = len(re.findall(r':[^:]+?:', text))

        # 17. emoji ratio
        emoji_ratio = emoji_count / token_num_per_tweet if token_num_per_tweet > 0 else 0
        # append features for each text as a list
        features.append([
            token_num_per_tweet,
            avg_char_num_per_token,
            sentence_num,
            hashtag_num,
            mention_num,
            link_num,
            sentiment_compound,
            sexwords_count,
            sexwords_ratio,
            punctuation_count,
            punctuation_ratio,
            exclamation_count,
            exclamation_ratio,
            question_count,
            question_ratio,
            emoji_count,
            emoji_ratio
        ])
    # convert to tensor
    return torch.tensor(features, dtype=torch.float32)

In [None]:
class SemStySexistDetector(nn.Module):
    def __init__(self, padding='max_length', num_classes=1, handcrafted_feature_dim=17):
        super(SemStySexistDetector, self).__init__()
        self.padding = padding
        self.berttokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.pooling = nn.AdaptiveAvgPool1d(1)

        combined_feature_dim = self.bert.config.hidden_size + handcrafted_feature_dim
        self.cls = nn.Sequential(
            nn.Linear(combined_feature_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Linear(512, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Linear(256, num_classes),
            nn.Sigmoid()
        )

        for param in self.bert.parameters():
            param.requires_grad = False

    def tokenize(self, texts):
        encoding = self.berttokenizer(
            texts,
            add_special_tokens=True,
            padding=self.padding,
            truncation=True,
            max_length=256,
            return_tensors="pt"
        )
        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)
        return input_ids, attention_mask

    def forward(self, texts):
        input_ids, attention_mask = self.tokenize(texts)
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_token = outputs.pooler_output
        handcrafted_feats = handcrafted_features(texts).to(device)
        # the pooler output and hand-crafted features are concatenated togetehr
        combined_features = torch.cat([cls_token, handcrafted_feats], dim=1)
        logits = self.cls(combined_features)
        return logits

In [None]:
# train function
def train(model, train_loader, test_loader, optimizer,
          scheduler,
          epochs, device, criterion=nn.BCELoss()):
    best_acc = 0  
    model.train()

    for epoch in range(epochs):
        total_loss = 0

        # Training loop
        for (texts, labels) in tqdm(train_loader):
            labels = labels.to(torch.float32).to(device)
            optimizer.zero_grad()
            logits = model(texts)
            logits = logits.squeeze(1)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

        # evaluate the model on the evaludation set after each epoch
        acc, f1 = evaluate(model, test_loader, device)
        print(f"Test Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")

        # if current acc is greater than previous best acc, save a new best model
        if acc > best_acc:
            best_acc = acc
            print(f"New best model found with accuracy: {best_acc:.4f}, saving the model...")
            torch.save(model, "best_model.pth")

        # apply scheduler to adjust the learning rate
        scheduler.step()

    print("Training complete!")

In [None]:
# evaluate model
def evaluate(model, dataloader, device, threshold=0.5):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for (texts, labels) in tqdm(dataloader):
            labels = labels.to(device)
            logits = model(texts)
            logits = logits.squeeze(1)
            preds = (logits > threshold).int()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_preds, all_labels)
    f1 = f1_score(all_preds, all_labels)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")

    return accuracy, f1

In [None]:
model = SemStySexistDetector()
model.to(device)

SemStySexistDetector(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elem

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

In [None]:
epochs = 50

In [None]:
train(model, train_loader, test_loader, optimizer, scheduler, epochs, device)

100%|██████████| 166/166 [00:19<00:00,  8.64it/s]


Epoch 1/50, Loss: 0.6464


100%|██████████| 42/42 [00:04<00:00,  8.91it/s]


Accuracy: 0.5852
F1 Score: 0.0803
Test Accuracy: 0.5852, F1 Score: 0.0803
New best model found with accuracy: 0.5852, saving the model...


100%|██████████| 166/166 [00:19<00:00,  8.72it/s]


Epoch 2/50, Loss: 0.6270


100%|██████████| 42/42 [00:04<00:00,  8.90it/s]


Accuracy: 0.6018
F1 Score: 0.1538
Test Accuracy: 0.6018, F1 Score: 0.1538
New best model found with accuracy: 0.6018, saving the model...


100%|██████████| 166/166 [00:19<00:00,  8.72it/s]


Epoch 3/50, Loss: 0.5915


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.6878
F1 Score: 0.6270
Test Accuracy: 0.6878, F1 Score: 0.6270
New best model found with accuracy: 0.6878, saving the model...


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 4/50, Loss: 0.5791


100%|██████████| 42/42 [00:04<00:00,  8.78it/s]


Accuracy: 0.6802
F1 Score: 0.5691
Test Accuracy: 0.6802, F1 Score: 0.5691


100%|██████████| 166/166 [00:19<00:00,  8.64it/s]


Epoch 5/50, Loss: 0.5820


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.5882
F1 Score: 0.0930
Test Accuracy: 0.5882, F1 Score: 0.0930


100%|██████████| 166/166 [00:19<00:00,  8.69it/s]


Epoch 6/50, Loss: 0.5581


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7270
F1 Score: 0.6998
Test Accuracy: 0.7270, F1 Score: 0.6998
New best model found with accuracy: 0.7270, saving the model...


100%|██████████| 166/166 [00:19<00:00,  8.68it/s]


Epoch 7/50, Loss: 0.5653


100%|██████████| 42/42 [00:04<00:00,  8.86it/s]


Accuracy: 0.7014
F1 Score: 0.5751
Test Accuracy: 0.7014, F1 Score: 0.5751


100%|██████████| 166/166 [00:19<00:00,  8.60it/s]


Epoch 8/50, Loss: 0.5420


100%|██████████| 42/42 [00:04<00:00,  8.79it/s]


Accuracy: 0.7210
F1 Score: 0.6816
Test Accuracy: 0.7210, F1 Score: 0.6816


100%|██████████| 166/166 [00:19<00:00,  8.62it/s]


Epoch 9/50, Loss: 0.5451


100%|██████████| 42/42 [00:04<00:00,  8.79it/s]


Accuracy: 0.7164
F1 Score: 0.6824
Test Accuracy: 0.7164, F1 Score: 0.6824


100%|██████████| 166/166 [00:19<00:00,  8.63it/s]


Epoch 10/50, Loss: 0.5220


100%|██████████| 42/42 [00:04<00:00,  8.85it/s]


Accuracy: 0.6848
F1 Score: 0.5173
Test Accuracy: 0.6848, F1 Score: 0.5173


100%|██████████| 166/166 [00:19<00:00,  8.66it/s]


Epoch 11/50, Loss: 0.5346


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.6305
F1 Score: 0.2773
Test Accuracy: 0.6305, F1 Score: 0.2773


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 12/50, Loss: 0.5282


100%|██████████| 42/42 [00:04<00:00,  8.81it/s]


Accuracy: 0.6305
F1 Score: 0.2815
Test Accuracy: 0.6305, F1 Score: 0.2815


100%|██████████| 166/166 [00:19<00:00,  8.62it/s]


Epoch 13/50, Loss: 0.5165


100%|██████████| 42/42 [00:04<00:00,  8.80it/s]


Accuracy: 0.6697
F1 Score: 0.4484
Test Accuracy: 0.6697, F1 Score: 0.4484


100%|██████████| 166/166 [00:19<00:00,  8.65it/s]


Epoch 14/50, Loss: 0.5104


100%|██████████| 42/42 [00:04<00:00,  8.84it/s]


Accuracy: 0.7572
F1 Score: 0.7330
Test Accuracy: 0.7572, F1 Score: 0.7330
New best model found with accuracy: 0.7572, saving the model...


100%|██████████| 166/166 [00:19<00:00,  8.68it/s]


Epoch 15/50, Loss: 0.5384


100%|██████████| 42/42 [00:04<00:00,  8.81it/s]


Accuracy: 0.6576
F1 Score: 0.7017
Test Accuracy: 0.6576, F1 Score: 0.7017


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 16/50, Loss: 0.5108


100%|██████████| 42/42 [00:04<00:00,  8.84it/s]


Accuracy: 0.7119
F1 Score: 0.7228
Test Accuracy: 0.7119, F1 Score: 0.7228


100%|██████████| 166/166 [00:19<00:00,  8.69it/s]


Epoch 17/50, Loss: 0.5108


100%|██████████| 42/42 [00:04<00:00,  8.85it/s]


Accuracy: 0.7541
F1 Score: 0.6998
Test Accuracy: 0.7541, F1 Score: 0.6998


100%|██████████| 166/166 [00:19<00:00,  8.69it/s]


Epoch 18/50, Loss: 0.5058


100%|██████████| 42/42 [00:04<00:00,  8.86it/s]


Accuracy: 0.6637
F1 Score: 0.4116
Test Accuracy: 0.6637, F1 Score: 0.4116


100%|██████████| 166/166 [00:19<00:00,  8.68it/s]


Epoch 19/50, Loss: 0.5038


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.7436
F1 Score: 0.6964
Test Accuracy: 0.7436, F1 Score: 0.6964


100%|██████████| 166/166 [00:19<00:00,  8.65it/s]


Epoch 20/50, Loss: 0.4905


100%|██████████| 42/42 [00:04<00:00,  8.77it/s]


Accuracy: 0.7195
F1 Score: 0.5974
Test Accuracy: 0.7195, F1 Score: 0.5974


100%|██████████| 166/166 [00:19<00:00,  8.65it/s]


Epoch 21/50, Loss: 0.5023


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7300
F1 Score: 0.6455
Test Accuracy: 0.7300, F1 Score: 0.6455


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 22/50, Loss: 0.4923


100%|██████████| 42/42 [00:04<00:00,  8.85it/s]


Accuracy: 0.7451
F1 Score: 0.6781
Test Accuracy: 0.7451, F1 Score: 0.6781


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 23/50, Loss: 0.5121


100%|██████████| 42/42 [00:04<00:00,  8.86it/s]


Accuracy: 0.7481
F1 Score: 0.7174
Test Accuracy: 0.7481, F1 Score: 0.7174


100%|██████████| 166/166 [00:19<00:00,  8.63it/s]


Epoch 24/50, Loss: 0.4903


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.7391
F1 Score: 0.6730
Test Accuracy: 0.7391, F1 Score: 0.6730


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 25/50, Loss: 0.4747


100%|██████████| 42/42 [00:04<00:00,  8.76it/s]


Accuracy: 0.7044
F1 Score: 0.5377
Test Accuracy: 0.7044, F1 Score: 0.5377


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 26/50, Loss: 0.4890


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.7451
F1 Score: 0.6515
Test Accuracy: 0.7451, F1 Score: 0.6515


100%|██████████| 166/166 [00:19<00:00,  8.66it/s]


Epoch 27/50, Loss: 0.4954


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7345
F1 Score: 0.7259
Test Accuracy: 0.7345, F1 Score: 0.7259


100%|██████████| 166/166 [00:19<00:00,  8.65it/s]


Epoch 28/50, Loss: 0.4725


100%|██████████| 42/42 [00:04<00:00,  8.84it/s]


Accuracy: 0.6938
F1 Score: 0.4988
Test Accuracy: 0.6938, F1 Score: 0.4988


100%|██████████| 166/166 [00:19<00:00,  8.69it/s]


Epoch 29/50, Loss: 0.4690


100%|██████████| 42/42 [00:04<00:00,  8.85it/s]


Accuracy: 0.7225
F1 Score: 0.5893
Test Accuracy: 0.7225, F1 Score: 0.5893


100%|██████████| 166/166 [00:19<00:00,  8.69it/s]


Epoch 30/50, Loss: 0.4706


100%|██████████| 42/42 [00:04<00:00,  8.84it/s]


Accuracy: 0.7285
F1 Score: 0.6311
Test Accuracy: 0.7285, F1 Score: 0.6311


100%|██████████| 166/166 [00:19<00:00,  8.65it/s]


Epoch 31/50, Loss: 0.4650


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7391
F1 Score: 0.6826
Test Accuracy: 0.7391, F1 Score: 0.6826


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 32/50, Loss: 0.4746


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.6863
F1 Score: 0.4851
Test Accuracy: 0.6863, F1 Score: 0.4851


100%|██████████| 166/166 [00:19<00:00,  8.66it/s]


Epoch 33/50, Loss: 0.4694


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7149
F1 Score: 0.5809
Test Accuracy: 0.7149, F1 Score: 0.5809


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 34/50, Loss: 0.4722


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7436
F1 Score: 0.6502
Test Accuracy: 0.7436, F1 Score: 0.6502


100%|██████████| 166/166 [00:19<00:00,  8.69it/s]


Epoch 35/50, Loss: 0.4766


100%|██████████| 42/42 [00:04<00:00,  8.85it/s]


Accuracy: 0.7225
F1 Score: 0.7229
Test Accuracy: 0.7225, F1 Score: 0.7229


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 36/50, Loss: 0.4462


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.7195
F1 Score: 0.6990
Test Accuracy: 0.7195, F1 Score: 0.6990


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 37/50, Loss: 0.4662


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.7602
F1 Score: 0.7125
Test Accuracy: 0.7602, F1 Score: 0.7125
New best model found with accuracy: 0.7602, saving the model...


100%|██████████| 166/166 [00:19<00:00,  8.63it/s]


Epoch 38/50, Loss: 0.4557


100%|██████████| 42/42 [00:04<00:00,  8.86it/s]


Accuracy: 0.7240
F1 Score: 0.7223
Test Accuracy: 0.7240, F1 Score: 0.7223


100%|██████████| 166/166 [00:19<00:00,  8.66it/s]


Epoch 39/50, Loss: 0.4587


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7240
F1 Score: 0.7189
Test Accuracy: 0.7240, F1 Score: 0.7189


100%|██████████| 166/166 [00:19<00:00,  8.64it/s]


Epoch 40/50, Loss: 0.4573


100%|██████████| 42/42 [00:04<00:00,  8.80it/s]


Accuracy: 0.6923
F1 Score: 0.4796
Test Accuracy: 0.6923, F1 Score: 0.4796


100%|██████████| 166/166 [00:19<00:00,  8.65it/s]


Epoch 41/50, Loss: 0.4646


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.7406
F1 Score: 0.6815
Test Accuracy: 0.7406, F1 Score: 0.6815


100%|██████████| 166/166 [00:19<00:00,  8.66it/s]


Epoch 42/50, Loss: 0.4565


100%|██████████| 42/42 [00:04<00:00,  8.84it/s]


Accuracy: 0.7406
F1 Score: 0.6850
Test Accuracy: 0.7406, F1 Score: 0.6850


100%|██████████| 166/166 [00:19<00:00,  8.68it/s]


Epoch 43/50, Loss: 0.4629


100%|██████████| 42/42 [00:04<00:00,  8.81it/s]


Accuracy: 0.7481
F1 Score: 0.7055
Test Accuracy: 0.7481, F1 Score: 0.7055


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 44/50, Loss: 0.4642


100%|██████████| 42/42 [00:04<00:00,  8.81it/s]


Accuracy: 0.7421
F1 Score: 0.7145
Test Accuracy: 0.7421, F1 Score: 0.7145


100%|██████████| 166/166 [00:19<00:00,  8.66it/s]


Epoch 45/50, Loss: 0.4547


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7240
F1 Score: 0.6147
Test Accuracy: 0.7240, F1 Score: 0.6147


100%|██████████| 166/166 [00:19<00:00,  8.65it/s]


Epoch 46/50, Loss: 0.4406


100%|██████████| 42/42 [00:04<00:00,  8.83it/s]


Accuracy: 0.7270
F1 Score: 0.7018
Test Accuracy: 0.7270, F1 Score: 0.7018


100%|██████████| 166/166 [00:19<00:00,  8.69it/s]


Epoch 47/50, Loss: 0.4390


100%|██████████| 42/42 [00:04<00:00,  8.87it/s]


Accuracy: 0.7195
F1 Score: 0.6125
Test Accuracy: 0.7195, F1 Score: 0.6125


100%|██████████| 166/166 [00:19<00:00,  8.67it/s]


Epoch 48/50, Loss: 0.4288


100%|██████████| 42/42 [00:04<00:00,  8.82it/s]


Accuracy: 0.7526
F1 Score: 0.6784
Test Accuracy: 0.7526, F1 Score: 0.6784


100%|██████████| 166/166 [00:19<00:00,  8.50it/s]


Epoch 49/50, Loss: 0.4563


100%|██████████| 42/42 [00:04<00:00,  8.77it/s]


Accuracy: 0.7481
F1 Score: 0.7065
Test Accuracy: 0.7481, F1 Score: 0.7065


100%|██████████| 166/166 [00:19<00:00,  8.64it/s]


Epoch 50/50, Loss: 0.4483


100%|██████████| 42/42 [00:04<00:00,  8.84it/s]

Accuracy: 0.6833
F1 Score: 0.4531
Test Accuracy: 0.6833, F1 Score: 0.4531
Training complete!





In [None]:
best_sem_sty_model = torch.load('best_model.pth').to(device)

  best_sem_sty_model = torch.load('best_model.pth').to(device)


In [None]:
evaluate(best_sem_sty_model, test_loader, device)

100%|██████████| 42/42 [00:04<00:00,  9.17it/s]

Accuracy: 0.7602
F1 Score: 0.7125





(0.7601809954751131, 0.7124773960216998)