In [None]:
!pip install vaderSentiment



In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer
import spacy
import nltk
import re
import string
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize, word_tokenize


In [None]:
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [None]:
analyzer = SentimentIntensityAnalyzer()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [None]:
# red the csv datasets
train_df = pd.read_csv('train_en_dataset.csv')
test_df = pd.read_csv('test_en_dataset.csv')

In [None]:
class TweetDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        tweet = self.data.iloc[idx]['tweet']
        label = self.data.iloc[idx]['value']
        return tweet, label

In [None]:
train_dataset = TweetDataset(train_df)
test_dataset = TweetDataset(test_df)

In [None]:
train_dataset[0:5]

(0    “mansplaining” is literally just how intellige...
 1    if you don’t want me but your friend do, dont ...
 2    @username @username @username @username isn't ...
 3    @username's account is temporarily unavailable...
 4    @username if it wasn't for the gender biases o...
 Name: tweet, dtype: object,
 0    1.0
 1    1.0
 2    1.0
 3    0.0
 4    1.0
 Name: value, dtype: float64)

In [None]:
positive_samples = sum(value == 1 for value in train_df['value'])
negative_samples = sum(value == 0 for value in train_df['value'])

In [None]:
positive_samples,negative_samples

(1048, 1603)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
nlp = spacy.load('en_core_web_sm')



In [None]:
# women-hatred lexicon - eng
en_lex_df = pd.read_csv('hurtlex_EN.tsv', sep='\t')
en_lex = en_lex_df['lemma'].tolist()

In [None]:
def handcrafted_features(texts):
    features = []
    for text in texts:
        doc = nlp(text)

        # clauses per Sentence
        sentence_count = len(list(doc.sents))
        clause_count = sum(1 for token in doc if token.dep_ in {"csubj", "ccomp", "advcl", "acl", "relcl"})
        clause_per_sentence = clause_count / sentence_count if sentence_count > 0 else 0

        # count of imperative sentences
        imperative_count = sum(1 for sent in doc.sents if len(sent) > 0 and sent[0].pos_ == "VERB" and sent[0].tag_ == "VB")

        # count of passive voice usage
        passive_count = sum(
            1 for token in doc if token.dep_ == "nsubjpass" and any(child.dep_ == "auxpass" for child in token.head.children)
        )

        # ratio of women-related gendered pronouns to total pronouns
        pronouns = [token.text.lower() for token in doc if token.pos_ in {"PRON"}]
        women_gendered_pronouns = {'she', 'her', 'hers'}
        gendered_count = sum(1 for pronoun in pronouns if pronoun in women_gendered_pronouns)
        total_pronouns = len(pronouns)
        gendered_pronoun_ratio = gendered_count / total_pronouns if total_pronouns > 0 else 0

        # count of negations
        neg_count = sum(1 for token in doc if token.dep_ == "neg")

        # 1. token count in a tweet
        tokens = nltk.word_tokenize(text)
        token_num_per_tweet = len(tokens)

        # 2. average number of chracters of words in a tweet
        # to show the complexity of the choice of word in each tweet
        char_num_per_tweet = sum(len(token) for token in tokens)
        avg_char_num_per_token = char_num_per_tweet / token_num_per_tweet if token_num_per_tweet != 0 else 0

        # 3. sentence count in a tweet
        sentences = nltk.sent_tokenize(text)
        sentence_num = len(sentences)

        # 4. number of hastags
        hashtag_num = len(re.findall(r'#(?!URL\b)\w+', text))

        # 5. number of mentions
        mention_num = text.count('@username')

        # 6. number of links
        link_num = text.count('#URL')

        # 7. sentiment feature of each tweet using VADER
        sentiment_scores = analyzer.polarity_scores(text)
        sentiment_compound = sentiment_scores['compound']  # compound score from -1 to 1

        # 8. number of seixst words of each tweet in sexism lexicon
        sexwords_count = len([token for token in tokens if token.lower() in en_lex])

        # 9. ratio of sexist word in a tweet
        sexwords_ratio = sexwords_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 10. number of all punctuations of each tweet
        punctuation_count = sum(1 for char in text if char in string.punctuation)

        # 11. ratio of punctuations in relation to the number of words
        punctuation_ratio = punctuation_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 12. number of exclamation marks
        exclamation_count = text.count('!')

        # 13. ratio of exclamation marks
        exclamation_ratio = exclamation_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 14. number of question marks
        question_count = text.count('?')

        # 15. ratio of question marks
        question_ratio = question_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 16. number of emojis in each tweet
        emoji_count = len(re.findall(r':[^:]+?:', text))

        # 17. emoji ratio
        emoji_ratio = emoji_count / token_num_per_tweet if token_num_per_tweet > 0 else 0
        # Append features for each text as a list
        features.append([
            clause_per_sentence,
            imperative_count,
            passive_count,
            gendered_pronoun_ratio,
            neg_count,
            token_num_per_tweet,
            avg_char_num_per_token,
            sentence_num,
            hashtag_num,
            mention_num,
            link_num,
            sentiment_compound,
            sexwords_count,
            sexwords_ratio,
            punctuation_count,
            punctuation_ratio,
            exclamation_count,
            exclamation_ratio,
            question_count,
            question_ratio,
            emoji_count,
            emoji_ratio
        ])
    # convert to tensor
    return torch.tensor(features, dtype=torch.float32)



In [None]:

class IntegratedSexistDetector(nn.Module):
    def __init__(self, padding='max_length', num_classes=1, handcrafted_feature_dim=22):
        super(IntegratedSexistDetector, self).__init__()
        self.padding = padding
        self.berttokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.pooling = nn.AdaptiveAvgPool1d(1)

        combined_feature_dim = self.bert.config.hidden_size + handcrafted_feature_dim
        self.cls = nn.Sequential(
            nn.Linear(combined_feature_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Linear(512, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Linear(256, num_classes)
        )

        for param in self.bert.parameters():
            param.requires_grad = False

    def tokenize(self, texts):
        encoding = self.berttokenizer(
            texts,
            add_special_tokens=True,
            padding=self.padding,
            truncation=True,
            max_length=256,
            return_tensors="pt"
        )
        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)
        return input_ids, attention_mask

    def forward(self, texts):
        input_ids, attention_mask = self.tokenize(texts)
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_token = outputs.pooler_output
        hidden_state = outputs.last_hidden_state[:,1:-1,:]
        mean_hidden_state = self.pooling(hidden_state.permute(0, 2, 1)).permute(0, 2, 1).squeeze(1)
        handcrafted_feats = handcrafted_features(texts).to(device)
        # the main difference from the fully integrated model: mean hidden state is not concatenated        
        combined_features = torch.cat([cls_token, handcrafted_feats], dim=1)
        features = self.cls(combined_features)
        return features

In [None]:
pos_weight = torch.tensor([negative_samples / positive_samples]).to(device)
pos_weight

tensor([1.5296], device='cuda:0')

In [None]:
# train function
def train(model, train_loader, test_loader, optimizer,
          scheduler,
          epochs, device, criterion=nn.BCEWithLogitsLoss(pos_weight=pos_weight)):
    best_acc = 0
    model.train()

    for epoch in range(epochs):
        total_loss = 0

        # training loop
        for (texts, labels) in tqdm(train_loader):
            labels = labels.to(torch.float32).to(device)
            optimizer.zero_grad()
            logits = model(texts)
            logits = logits.squeeze(1)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

        # evaluate the model on the evaluation set after each epoch
        acc, f1 = evaluate(model, test_loader, device)
        print(f"Test Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")

        # if current acc is greater than previous best acc, save a new best model
        if acc > best_acc:
            best_acc = acc
            print(f"New best model found with accuracy: {best_acc:.4f}, saving the model...")
            torch.save(model, "best_model.pth")

        # apply scheduler to adjust the learning rate
        scheduler.step()

    print("Training complete!")

In [None]:
# evaluate model
sigmoid = nn.Sigmoid()

def evaluate(model, dataloader, device, threshold=0.5):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for (texts, labels) in tqdm(dataloader):
            labels = labels.to(device)
            features = model(texts)
            logits = sigmoid(features)
            logits = logits.squeeze(1)
            preds = (logits > threshold).int()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")

    return accuracy, f1

In [None]:
model = IntegratedSexistDetector()
model.to(device)

IntegratedSexistDetector(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, 

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

In [None]:
epochs = 50

In [None]:
train(model, train_loader, test_loader, optimizer, scheduler, epochs, device)

100%|██████████| 166/166 [00:43<00:00,  3.86it/s]


Epoch 1/50, Loss: 0.8605


100%|██████████| 42/42 [00:10<00:00,  4.12it/s]


Accuracy: 0.5762
F1 Score: 0.3388
Test Accuracy: 0.5762, F1 Score: 0.3388
New best model found with accuracy: 0.5762, saving the model...


100%|██████████| 166/166 [00:39<00:00,  4.18it/s]


Epoch 2/50, Loss: 0.8412


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.4284
F1 Score: 0.5989
Test Accuracy: 0.4284, F1 Score: 0.5989


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 3/50, Loss: 0.8175


100%|██████████| 42/42 [00:09<00:00,  4.22it/s]


Accuracy: 0.5792
F1 Score: 0.6543
Test Accuracy: 0.5792, F1 Score: 0.6543
New best model found with accuracy: 0.5792, saving the model...


100%|██████████| 166/166 [00:40<00:00,  4.12it/s]


Epoch 4/50, Loss: 0.7814


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.6410
F1 Score: 0.5182
Test Accuracy: 0.6410, F1 Score: 0.5182
New best model found with accuracy: 0.6410, saving the model...


100%|██████████| 166/166 [00:40<00:00,  4.12it/s]


Epoch 5/50, Loss: 0.7604


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.6652
F1 Score: 0.6764
Test Accuracy: 0.6652, F1 Score: 0.6764
New best model found with accuracy: 0.6652, saving the model...


100%|██████████| 166/166 [00:40<00:00,  4.13it/s]


Epoch 6/50, Loss: 0.7383


100%|██████████| 42/42 [00:10<00:00,  4.19it/s]


Accuracy: 0.6078
F1 Score: 0.2353
Test Accuracy: 0.6078, F1 Score: 0.2353


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 7/50, Loss: 0.7233


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.6908
F1 Score: 0.6004
Test Accuracy: 0.6908, F1 Score: 0.6004
New best model found with accuracy: 0.6908, saving the model...


100%|██████████| 166/166 [00:40<00:00,  4.13it/s]


Epoch 8/50, Loss: 0.6816


100%|██████████| 42/42 [00:09<00:00,  4.24it/s]


Accuracy: 0.5777
F1 Score: 0.6610
Test Accuracy: 0.5777, F1 Score: 0.6610


100%|██████████| 166/166 [00:39<00:00,  4.17it/s]


Epoch 9/50, Loss: 0.6987


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.7300
F1 Score: 0.6908
Test Accuracy: 0.7300, F1 Score: 0.6908
New best model found with accuracy: 0.7300, saving the model...


100%|██████████| 166/166 [00:39<00:00,  4.17it/s]


Epoch 10/50, Loss: 0.6923


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.6817
F1 Score: 0.7106
Test Accuracy: 0.6817, F1 Score: 0.7106


100%|██████████| 166/166 [00:39<00:00,  4.17it/s]


Epoch 11/50, Loss: 0.6492


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7059
F1 Score: 0.6919
Test Accuracy: 0.7059, F1 Score: 0.6919


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 12/50, Loss: 0.6497


100%|██████████| 42/42 [00:09<00:00,  4.22it/s]


Accuracy: 0.7315
F1 Score: 0.6654
Test Accuracy: 0.7315, F1 Score: 0.6654
New best model found with accuracy: 0.7315, saving the model...


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 13/50, Loss: 0.6424


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7360
F1 Score: 0.6615
Test Accuracy: 0.7360, F1 Score: 0.6615
New best model found with accuracy: 0.7360, saving the model...


100%|██████████| 166/166 [00:40<00:00,  4.15it/s]


Epoch 14/50, Loss: 0.6381


100%|██████████| 42/42 [00:10<00:00,  4.20it/s]


Accuracy: 0.6772
F1 Score: 0.7036
Test Accuracy: 0.6772, F1 Score: 0.7036


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 15/50, Loss: 0.6467


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.6712
F1 Score: 0.4523
Test Accuracy: 0.6712, F1 Score: 0.4523


100%|██████████| 166/166 [00:39<00:00,  4.17it/s]


Epoch 16/50, Loss: 0.6357


100%|██████████| 42/42 [00:09<00:00,  4.27it/s]


Accuracy: 0.7391
F1 Score: 0.7205
Test Accuracy: 0.7391, F1 Score: 0.7205
New best model found with accuracy: 0.7391, saving the model...


100%|██████████| 166/166 [00:39<00:00,  4.17it/s]


Epoch 17/50, Loss: 0.6143


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.7119
F1 Score: 0.5765
Test Accuracy: 0.7119, F1 Score: 0.5765


100%|██████████| 166/166 [00:39<00:00,  4.18it/s]


Epoch 18/50, Loss: 0.6208


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.7225
F1 Score: 0.7270
Test Accuracy: 0.7225, F1 Score: 0.7270


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 19/50, Loss: 0.6169


100%|██████████| 42/42 [00:09<00:00,  4.24it/s]


Accuracy: 0.7240
F1 Score: 0.6195
Test Accuracy: 0.7240, F1 Score: 0.6195


100%|██████████| 166/166 [00:40<00:00,  4.15it/s]


Epoch 20/50, Loss: 0.6365


100%|██████████| 42/42 [00:09<00:00,  4.27it/s]


Accuracy: 0.7195
F1 Score: 0.7297
Test Accuracy: 0.7195, F1 Score: 0.7297


100%|██████████| 166/166 [00:39<00:00,  4.15it/s]


Epoch 21/50, Loss: 0.5804


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.7451
F1 Score: 0.7207
Test Accuracy: 0.7451, F1 Score: 0.7207
New best model found with accuracy: 0.7451, saving the model...


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 22/50, Loss: 0.5693


100%|██████████| 42/42 [00:09<00:00,  4.22it/s]


Accuracy: 0.7466
F1 Score: 0.7021
Test Accuracy: 0.7466, F1 Score: 0.7021
New best model found with accuracy: 0.7466, saving the model...


100%|██████████| 166/166 [00:40<00:00,  4.15it/s]


Epoch 23/50, Loss: 0.5702


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.7511
F1 Score: 0.7255
Test Accuracy: 0.7511, F1 Score: 0.7255
New best model found with accuracy: 0.7511, saving the model...


100%|██████████| 166/166 [00:39<00:00,  4.17it/s]


Epoch 24/50, Loss: 0.5645


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7421
F1 Score: 0.6941
Test Accuracy: 0.7421, F1 Score: 0.6941


100%|██████████| 166/166 [00:39<00:00,  4.15it/s]


Epoch 25/50, Loss: 0.5704


100%|██████████| 42/42 [00:10<00:00,  4.18it/s]


Accuracy: 0.7436
F1 Score: 0.6964
Test Accuracy: 0.7436, F1 Score: 0.6964


100%|██████████| 166/166 [00:39<00:00,  4.15it/s]


Epoch 26/50, Loss: 0.5657


100%|██████████| 42/42 [00:09<00:00,  4.20it/s]


Accuracy: 0.7481
F1 Score: 0.7165
Test Accuracy: 0.7481, F1 Score: 0.7165


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 27/50, Loss: 0.5638


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7541
F1 Score: 0.7297
Test Accuracy: 0.7541, F1 Score: 0.7297
New best model found with accuracy: 0.7541, saving the model...


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 28/50, Loss: 0.5605


100%|██████████| 42/42 [00:09<00:00,  4.24it/s]


Accuracy: 0.7481
F1 Score: 0.7267
Test Accuracy: 0.7481, F1 Score: 0.7267


100%|██████████| 166/166 [00:39<00:00,  4.17it/s]


Epoch 29/50, Loss: 0.5647


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7436
F1 Score: 0.7258
Test Accuracy: 0.7436, F1 Score: 0.7258


100%|██████████| 166/166 [00:39<00:00,  4.15it/s]


Epoch 30/50, Loss: 0.5624


100%|██████████| 42/42 [00:09<00:00,  4.25it/s]


Accuracy: 0.7511
F1 Score: 0.7140
Test Accuracy: 0.7511, F1 Score: 0.7140


100%|██████████| 166/166 [00:40<00:00,  4.15it/s]


Epoch 31/50, Loss: 0.5574


100%|██████████| 42/42 [00:09<00:00,  4.26it/s]


Accuracy: 0.7496
F1 Score: 0.7158
Test Accuracy: 0.7496, F1 Score: 0.7158


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 32/50, Loss: 0.5569


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7481
F1 Score: 0.7240
Test Accuracy: 0.7481, F1 Score: 0.7240


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 33/50, Loss: 0.5552


100%|██████████| 42/42 [00:09<00:00,  4.24it/s]


Accuracy: 0.7496
F1 Score: 0.7088
Test Accuracy: 0.7496, F1 Score: 0.7088


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 34/50, Loss: 0.5551


100%|██████████| 42/42 [00:10<00:00,  4.19it/s]


Accuracy: 0.7572
F1 Score: 0.7339
Test Accuracy: 0.7572, F1 Score: 0.7339
New best model found with accuracy: 0.7572, saving the model...


100%|██████████| 166/166 [00:40<00:00,  4.12it/s]


Epoch 35/50, Loss: 0.5528


100%|██████████| 42/42 [00:10<00:00,  4.19it/s]


Accuracy: 0.7360
F1 Score: 0.7209
Test Accuracy: 0.7360, F1 Score: 0.7209


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 36/50, Loss: 0.5579


100%|██████████| 42/42 [00:09<00:00,  4.21it/s]


Accuracy: 0.7496
F1 Score: 0.7323
Test Accuracy: 0.7496, F1 Score: 0.7323


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 37/50, Loss: 0.5535


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7511
F1 Score: 0.7208
Test Accuracy: 0.7511, F1 Score: 0.7208


100%|██████████| 166/166 [00:40<00:00,  4.12it/s]


Epoch 38/50, Loss: 0.5518


100%|██████████| 42/42 [00:10<00:00,  4.20it/s]


Accuracy: 0.7466
F1 Score: 0.6923
Test Accuracy: 0.7466, F1 Score: 0.6923


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 39/50, Loss: 0.5489


100%|██████████| 42/42 [00:09<00:00,  4.22it/s]


Accuracy: 0.7255
F1 Score: 0.7165
Test Accuracy: 0.7255, F1 Score: 0.7165


100%|██████████| 166/166 [00:40<00:00,  4.12it/s]


Epoch 40/50, Loss: 0.5524


100%|██████████| 42/42 [00:09<00:00,  4.20it/s]


Accuracy: 0.7496
F1 Score: 0.7270
Test Accuracy: 0.7496, F1 Score: 0.7270


100%|██████████| 166/166 [00:40<00:00,  4.11it/s]


Epoch 41/50, Loss: 0.5442


100%|██████████| 42/42 [00:09<00:00,  4.26it/s]


Accuracy: 0.7557
F1 Score: 0.7216
Test Accuracy: 0.7557, F1 Score: 0.7216


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 42/50, Loss: 0.5433


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7511
F1 Score: 0.7227
Test Accuracy: 0.7511, F1 Score: 0.7227


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 43/50, Loss: 0.5433


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7541
F1 Score: 0.7223
Test Accuracy: 0.7541, F1 Score: 0.7223


100%|██████████| 166/166 [00:39<00:00,  4.16it/s]


Epoch 44/50, Loss: 0.5429


100%|██████████| 42/42 [00:10<00:00,  4.19it/s]


Accuracy: 0.7511
F1 Score: 0.7199
Test Accuracy: 0.7511, F1 Score: 0.7199


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 45/50, Loss: 0.5428


100%|██████████| 42/42 [00:09<00:00,  4.24it/s]


Accuracy: 0.7496
F1 Score: 0.7196
Test Accuracy: 0.7496, F1 Score: 0.7196


100%|██████████| 166/166 [00:40<00:00,  4.15it/s]


Epoch 46/50, Loss: 0.5425


100%|██████████| 42/42 [00:09<00:00,  4.21it/s]


Accuracy: 0.7511
F1 Score: 0.7218
Test Accuracy: 0.7511, F1 Score: 0.7218


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 47/50, Loss: 0.5422


100%|██████████| 42/42 [00:09<00:00,  4.21it/s]


Accuracy: 0.7557
F1 Score: 0.7226
Test Accuracy: 0.7557, F1 Score: 0.7226


100%|██████████| 166/166 [00:40<00:00,  4.15it/s]


Epoch 48/50, Loss: 0.5431


100%|██████████| 42/42 [00:09<00:00,  4.23it/s]


Accuracy: 0.7557
F1 Score: 0.7226
Test Accuracy: 0.7557, F1 Score: 0.7226


100%|██████████| 166/166 [00:40<00:00,  4.13it/s]


Epoch 49/50, Loss: 0.5427


100%|██████████| 42/42 [00:10<00:00,  4.19it/s]


Accuracy: 0.7557
F1 Score: 0.7226
Test Accuracy: 0.7557, F1 Score: 0.7226


100%|██████████| 166/166 [00:40<00:00,  4.14it/s]


Epoch 50/50, Loss: 0.5419


100%|██████████| 42/42 [00:09<00:00,  4.22it/s]

Accuracy: 0.7526
F1 Score: 0.7201
Test Accuracy: 0.7526, F1 Score: 0.7201
Training complete!





In [None]:
best_integrated_model = torch.load('best_model.pth').to(device)

  best_integrated_model = torch.load('best_model.pth').to(device)


In [None]:
evaluate(best_integrated_model, test_loader, device)

100%|██████████| 42/42 [00:09<00:00,  4.22it/s]

Accuracy: 0.7572
F1 Score: 0.7339





(0.7571644042232277, 0.7338842975206612)