In [None]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer
import spacy
import nltk
import re
import string
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize, word_tokenize
from sklearn.model_selection import train_test_split

In [None]:
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
analyzer = SentimentIntensityAnalyzer()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [None]:
dataset_gender_label = pd.read_csv('gender_consensus_labels.csv')

In [None]:
train_df, test_df = train_test_split(dataset_gender_label, test_size=0.2, random_state=42)

In [None]:
class TweetDataset_train(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        tweet = self.data.iloc[idx]['tweet']
        label = self.data.iloc[idx]['male_consensus_label']
        return tweet, label

In [None]:
class TweetDataset_test(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        tweet = self.data.iloc[idx]['tweet']
        label = self.data.iloc[idx]['consensus_label']
        return tweet, label

In [None]:
train_dataset = TweetDataset_train(train_df)
test_dataset = TweetDataset_test(test_df)

In [None]:
train_dataset[0:5]

(1459    “mansplaining” is literally just how intellige...
 535     if you don’t want me but your friend do, dont ...
 1714    @username @username @username @username isn't ...
 2484    @username's account is temporarily unavailable...
 1097    @username if it wasn't for the gender biases o...
 Name: tweet, dtype: object,
 1459    1
 535     1
 1714    1
 2484    0
 1097    1
 Name: male_consensus_label, dtype: int64)

In [None]:
positive_samples = sum(value == 1 for value in train_df['male_consensus_label'])
negative_samples = sum(value == 0 for value in train_df['male_consensus_label'])

In [None]:
positive_samples,negative_samples

(1047, 1604)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
nlp = spacy.load('en_core_web_sm')

In [None]:
# women-hatred lexicon - eng
en_lex_df = pd.read_csv('hurtlex_EN.tsv', sep='\t')
en_lex = en_lex_df['lemma'].tolist()

In [None]:
def handcrafted_features(texts):
    features = []
    for text in texts:
        doc = nlp(text)

        # clauses per Sentence
        sentence_count = len(list(doc.sents))
        clause_count = sum(1 for token in doc if token.dep_ in {"csubj", "ccomp", "advcl", "acl", "relcl"})
        clause_per_sentence = clause_count / sentence_count if sentence_count > 0 else 0

        # count of imperative sentences
        imperative_count = sum(1 for sent in doc.sents if len(sent) > 0 and sent[0].pos_ == "VERB" and sent[0].tag_ == "VB")

        # count of passive voice usage
        passive_count = sum(
            1 for token in doc if token.dep_ == "nsubjpass" and any(child.dep_ == "auxpass" for child in token.head.children)
        )

        # ratio of women-related gendered pronouns to total pronouns
        pronouns = [token.text.lower() for token in doc if token.pos_ in {"PRON"}]
        women_gendered_pronouns = {'she', 'her', 'hers'}
        gendered_count = sum(1 for pronoun in pronouns if pronoun in women_gendered_pronouns)
        total_pronouns = len(pronouns)
        gendered_pronoun_ratio = gendered_count / total_pronouns if total_pronouns > 0 else 0

        # count of negations
        neg_count = sum(1 for token in doc if token.dep_ == "neg")

        # 1. token count in a tweet
        tokens = nltk.word_tokenize(text)
        token_num_per_tweet = len(tokens)

        # 2. average number of chracters of words in a tweet
        # to show the complexity of the choice of word in each tweet
        char_num_per_tweet = sum(len(token) for token in tokens)
        avg_char_num_per_token = char_num_per_tweet / token_num_per_tweet if token_num_per_tweet != 0 else 0

        # 3. sentence count in a tweet
        sentences = nltk.sent_tokenize(text)
        sentence_num = len(sentences)

        # 4. number of hastags
        hashtag_num = len(re.findall(r'#(?!URL\b)\w+', text))

        # 5. number of mentions
        mention_num = text.count('@username')

        # 6. number of links
        link_num = text.count('#URL')

        # 7. sentiment feature of each tweet using VADER
        sentiment_scores = analyzer.polarity_scores(text)
        sentiment_compound = sentiment_scores['compound']  # compound score from -1 to 1

        # 8. number of seixst words of each tweet in sexism lexicon
        sexwords_count = len([token for token in tokens if token.lower() in en_lex])

        # 9. ratio of sexist word in a tweet
        sexwords_ratio = sexwords_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 10. number of all punctuations of each tweet
        punctuation_count = sum(1 for char in text if char in string.punctuation)

        # 11. ratio of punctuations in relation to the number of words
        punctuation_ratio = punctuation_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 12. number of exclamation marks
        exclamation_count = text.count('!')

        # 13. ratio of exclamation marks
        exclamation_ratio = exclamation_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 14. number of question marks
        question_count = text.count('?')

        # 15. ratio of question marks
        question_ratio = question_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # 16. number of emojis in each tweet
        emoji_count = len(re.findall(r':[^:]+?:', text))

        # 17. emoji ratio
        emoji_ratio = emoji_count / token_num_per_tweet if token_num_per_tweet > 0 else 0

        # append features for each text as a list
        features.append([
            clause_per_sentence,
            imperative_count,
            passive_count,
            gendered_pronoun_ratio,
            neg_count,
            token_num_per_tweet,
            avg_char_num_per_token,
            sentence_num,
            hashtag_num,
            mention_num,
            link_num,
            sentiment_compound,
            sexwords_count,
            sexwords_ratio,
            punctuation_count,
            punctuation_ratio,
            exclamation_count,
            exclamation_ratio,
            question_count,
            question_ratio,
            emoji_count,
            emoji_ratio
        ])

    # convert to tensor
    return torch.tensor(features, dtype=torch.float32)

In [None]:
class IntegratedSexistDetector(nn.Module):
    def __init__(self, padding='max_length', num_classes=1, handcrafted_feature_dim=22):
        super(IntegratedSexistDetector, self).__init__()
        self.padding = padding
        self.berttokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.pooling = nn.AdaptiveAvgPool1d(1)

        combined_feature_dim = self.bert.config.hidden_size + self.bert.config.hidden_size + handcrafted_feature_dim
        self.cls = nn.Sequential(
            nn.Linear(combined_feature_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Linear(512, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Dropout(0.1),

            nn.Linear(256, num_classes)
        )

        # set the bert parameters as non-trainable
        for param in self.bert.parameters():
            param.requires_grad = False

    def tokenize(self, texts):
        encoding = self.berttokenizer(
            texts,
            add_special_tokens=True,
            padding=self.padding,
            truncation=True,
            max_length=256,
            return_tensors="pt"
        )
        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)
        return input_ids, attention_mask

    def forward(self, texts):
        input_ids, attention_mask = self.tokenize(texts)
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_token = outputs.pooler_output
        hidden_state = outputs.last_hidden_state[:,1:-1,:]
        mean_hidden_state = self.pooling(hidden_state.permute(0, 2, 1)).permute(0, 2, 1).squeeze(1)
        handcrafted_feats = handcrafted_features(texts).to(device)
        combined_features = torch.cat([cls_token, mean_hidden_state, handcrafted_feats], dim=1)
        features = self.cls(combined_features)
        return features

In [None]:
pos_weight = torch.tensor([negative_samples / positive_samples]).to(device)
pos_weight

tensor([1.5320], device='cuda:0')

In [None]:
# train function
def train(model, train_loader, test_loader, optimizer,
          scheduler,
          epochs, device, criterion=nn.BCEWithLogitsLoss(pos_weight=pos_weight)):
    best_acc = 0
    model.train()

    for epoch in range(epochs):
        total_loss = 0

        # training loop
        for (texts, labels) in tqdm(train_loader):
            labels = labels.to(torch.float32).to(device)
            optimizer.zero_grad()
            logits = model(texts)
            logits = logits.squeeze(1)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

        # evaluate the model on the evaluation set after each epoch
        acc, f1 = evaluate(model, test_loader, device)
        print(f"Test Accuracy: {acc:.4f}, F1 Score: {f1:.4f}")

        # if current acc is greater than previous best acc, save a new best model
        if acc > best_acc:
            best_acc = acc
            print(f"New best model found with accuracy: {best_acc:.4f}, saving the model...")
            torch.save(model, "best_model.pth")

        # apply scheduler to adjust the learning rate
        scheduler.step()

    print("Training complete!")

In [None]:
# evaluate model
sigmoid = nn.Sigmoid()

def evaluate(model, dataloader, device, threshold=0.5):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for (texts, labels) in tqdm(dataloader):
            labels = labels.to(device)
            features = model(texts)
            logits = sigmoid(features)
            logits = logits.squeeze(1)
            preds = (logits > threshold).int()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")

    return accuracy, f1

In [None]:
model = IntegratedSexistDetector()
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

IntegratedSexistDetector(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, 

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

In [None]:
epochs = 50

In [None]:
train(model, train_loader, test_loader, optimizer, scheduler, epochs, device)

100%|██████████| 166/166 [00:45<00:00,  3.61it/s]


Epoch 1/50, Loss: 0.8013


100%|██████████| 42/42 [00:10<00:00,  3.92it/s]


Accuracy: 0.7255
F1 Score: 0.7065
Test Accuracy: 0.7255, F1 Score: 0.7065
New best model found with accuracy: 0.7255, saving the model...


100%|██████████| 166/166 [00:41<00:00,  3.99it/s]


Epoch 2/50, Loss: 0.6589


100%|██████████| 42/42 [00:10<00:00,  4.07it/s]


Accuracy: 0.7466
F1 Score: 0.7316
Test Accuracy: 0.7466, F1 Score: 0.7316
New best model found with accuracy: 0.7466, saving the model...


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 3/50, Loss: 0.6408


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7315
F1 Score: 0.5991
Test Accuracy: 0.7315, F1 Score: 0.5991


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 4/50, Loss: 0.5743


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7873
F1 Score: 0.7638
Test Accuracy: 0.7873, F1 Score: 0.7638
New best model found with accuracy: 0.7873, saving the model...


100%|██████████| 166/166 [00:41<00:00,  3.99it/s]


Epoch 5/50, Loss: 0.5937


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7496
F1 Score: 0.7522
Test Accuracy: 0.7496, F1 Score: 0.7522


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 6/50, Loss: 0.5550


100%|██████████| 42/42 [00:10<00:00,  4.10it/s]


Accuracy: 0.7722
F1 Score: 0.7561
Test Accuracy: 0.7722, F1 Score: 0.7561


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 7/50, Loss: 0.5584


100%|██████████| 42/42 [00:10<00:00,  4.11it/s]


Accuracy: 0.7888
F1 Score: 0.7500
Test Accuracy: 0.7888, F1 Score: 0.7500
New best model found with accuracy: 0.7888, saving the model...


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 8/50, Loss: 0.5416


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7753
F1 Score: 0.7118
Test Accuracy: 0.7753, F1 Score: 0.7118


100%|██████████| 166/166 [00:41<00:00,  3.98it/s]


Epoch 9/50, Loss: 0.5252


100%|██████████| 42/42 [00:10<00:00,  4.04it/s]


Accuracy: 0.7526
F1 Score: 0.7508
Test Accuracy: 0.7526, F1 Score: 0.7508


100%|██████████| 166/166 [00:41<00:00,  3.97it/s]


Epoch 10/50, Loss: 0.5113


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7572
F1 Score: 0.7046
Test Accuracy: 0.7572, F1 Score: 0.7046


100%|██████████| 166/166 [00:41<00:00,  4.02it/s]


Epoch 11/50, Loss: 0.5097


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7662
F1 Score: 0.7386
Test Accuracy: 0.7662, F1 Score: 0.7386


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 12/50, Loss: 0.5122


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7481
F1 Score: 0.6924
Test Accuracy: 0.7481, F1 Score: 0.6924


100%|██████████| 166/166 [00:41<00:00,  4.02it/s]


Epoch 13/50, Loss: 0.4424


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7602
F1 Score: 0.7186
Test Accuracy: 0.7602, F1 Score: 0.7186


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 14/50, Loss: 0.4269


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7647
F1 Score: 0.7164
Test Accuracy: 0.7647, F1 Score: 0.7164


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 15/50, Loss: 0.4180


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7632
F1 Score: 0.7140
Test Accuracy: 0.7632, F1 Score: 0.7140


100%|██████████| 166/166 [00:41<00:00,  4.02it/s]


Epoch 16/50, Loss: 0.4163


100%|██████████| 42/42 [00:10<00:00,  4.11it/s]


Accuracy: 0.7662
F1 Score: 0.7504
Test Accuracy: 0.7662, F1 Score: 0.7504


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 17/50, Loss: 0.4102


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7753
F1 Score: 0.7391
Test Accuracy: 0.7753, F1 Score: 0.7391


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 18/50, Loss: 0.4057


100%|██████████| 42/42 [00:10<00:00,  4.07it/s]


Accuracy: 0.7722
F1 Score: 0.7410
Test Accuracy: 0.7722, F1 Score: 0.7410


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 19/50, Loss: 0.4004


100%|██████████| 42/42 [00:10<00:00,  4.07it/s]


Accuracy: 0.7738
F1 Score: 0.7378
Test Accuracy: 0.7738, F1 Score: 0.7378


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 20/50, Loss: 0.3939


100%|██████████| 42/42 [00:10<00:00,  4.10it/s]


Accuracy: 0.7738
F1 Score: 0.7414
Test Accuracy: 0.7738, F1 Score: 0.7414


100%|██████████| 166/166 [00:41<00:00,  3.97it/s]


Epoch 21/50, Loss: 0.3882


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7692
F1 Score: 0.7330
Test Accuracy: 0.7692, F1 Score: 0.7330


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 22/50, Loss: 0.3847


100%|██████████| 42/42 [00:10<00:00,  4.10it/s]


Accuracy: 0.7722
F1 Score: 0.7279
Test Accuracy: 0.7722, F1 Score: 0.7279


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 23/50, Loss: 0.3807


100%|██████████| 42/42 [00:10<00:00,  4.05it/s]


Accuracy: 0.7707
F1 Score: 0.7415
Test Accuracy: 0.7707, F1 Score: 0.7415


100%|██████████| 166/166 [00:41<00:00,  3.98it/s]


Epoch 24/50, Loss: 0.3740


100%|██████████| 42/42 [00:10<00:00,  4.07it/s]


Accuracy: 0.7768
F1 Score: 0.7404
Test Accuracy: 0.7768, F1 Score: 0.7404


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 25/50, Loss: 0.3605


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7798
F1 Score: 0.7465
Test Accuracy: 0.7798, F1 Score: 0.7465


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 26/50, Loss: 0.3599


100%|██████████| 42/42 [00:10<00:00,  4.07it/s]


Accuracy: 0.7783
F1 Score: 0.7443
Test Accuracy: 0.7783, F1 Score: 0.7443


100%|██████████| 166/166 [00:41<00:00,  3.99it/s]


Epoch 27/50, Loss: 0.3588


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7798
F1 Score: 0.7465
Test Accuracy: 0.7798, F1 Score: 0.7465


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 28/50, Loss: 0.3586


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7753
F1 Score: 0.7444
Test Accuracy: 0.7753, F1 Score: 0.7444


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 29/50, Loss: 0.3582


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7738
F1 Score: 0.7378
Test Accuracy: 0.7738, F1 Score: 0.7378


100%|██████████| 166/166 [00:41<00:00,  3.99it/s]


Epoch 30/50, Loss: 0.3579


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7783
F1 Score: 0.7452
Test Accuracy: 0.7783, F1 Score: 0.7452


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 31/50, Loss: 0.3566


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7738
F1 Score: 0.7414
Test Accuracy: 0.7738, F1 Score: 0.7414


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 32/50, Loss: 0.3556


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7768
F1 Score: 0.7431
Test Accuracy: 0.7768, F1 Score: 0.7431


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 33/50, Loss: 0.3551


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7722
F1 Score: 0.7356
Test Accuracy: 0.7722, F1 Score: 0.7356


100%|██████████| 166/166 [00:41<00:00,  3.95it/s]


Epoch 34/50, Loss: 0.3556


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7738
F1 Score: 0.7414
Test Accuracy: 0.7738, F1 Score: 0.7414


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 35/50, Loss: 0.3540


100%|██████████| 42/42 [00:10<00:00,  4.07it/s]


Accuracy: 0.7722
F1 Score: 0.7356
Test Accuracy: 0.7722, F1 Score: 0.7356


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 36/50, Loss: 0.3544


100%|██████████| 42/42 [00:10<00:00,  4.10it/s]


Accuracy: 0.7753
F1 Score: 0.7400
Test Accuracy: 0.7753, F1 Score: 0.7400


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 37/50, Loss: 0.3519


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7753
F1 Score: 0.7400
Test Accuracy: 0.7753, F1 Score: 0.7400


100%|██████████| 166/166 [00:41<00:00,  3.97it/s]


Epoch 38/50, Loss: 0.3523


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7753
F1 Score: 0.7400
Test Accuracy: 0.7753, F1 Score: 0.7400


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 39/50, Loss: 0.3522


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7753
F1 Score: 0.7400
Test Accuracy: 0.7753, F1 Score: 0.7400


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 40/50, Loss: 0.3526


100%|██████████| 42/42 [00:10<00:00,  4.09it/s]


Accuracy: 0.7768
F1 Score: 0.7422
Test Accuracy: 0.7768, F1 Score: 0.7422


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 41/50, Loss: 0.3519


100%|██████████| 42/42 [00:10<00:00,  4.03it/s]


Accuracy: 0.7768
F1 Score: 0.7422
Test Accuracy: 0.7768, F1 Score: 0.7422


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 42/50, Loss: 0.3515


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7768
F1 Score: 0.7422
Test Accuracy: 0.7768, F1 Score: 0.7422


100%|██████████| 166/166 [00:41<00:00,  3.99it/s]


Epoch 43/50, Loss: 0.3520


100%|██████████| 42/42 [00:10<00:00,  4.04it/s]


Accuracy: 0.7768
F1 Score: 0.7422
Test Accuracy: 0.7768, F1 Score: 0.7422


100%|██████████| 166/166 [00:41<00:00,  3.99it/s]


Epoch 44/50, Loss: 0.3517


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7783
F1 Score: 0.7443
Test Accuracy: 0.7783, F1 Score: 0.7443


100%|██████████| 166/166 [00:41<00:00,  4.01it/s]


Epoch 45/50, Loss: 0.3522


100%|██████████| 42/42 [00:10<00:00,  4.04it/s]


Accuracy: 0.7783
F1 Score: 0.7443
Test Accuracy: 0.7783, F1 Score: 0.7443


100%|██████████| 166/166 [00:41<00:00,  3.98it/s]


Epoch 46/50, Loss: 0.3522


100%|██████████| 42/42 [00:10<00:00,  4.08it/s]


Accuracy: 0.7768
F1 Score: 0.7422
Test Accuracy: 0.7768, F1 Score: 0.7422


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 47/50, Loss: 0.3517


100%|██████████| 42/42 [00:10<00:00,  4.05it/s]


Accuracy: 0.7783
F1 Score: 0.7443
Test Accuracy: 0.7783, F1 Score: 0.7443


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 48/50, Loss: 0.3513


100%|██████████| 42/42 [00:10<00:00,  4.06it/s]


Accuracy: 0.7768
F1 Score: 0.7422
Test Accuracy: 0.7768, F1 Score: 0.7422


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 49/50, Loss: 0.3518


100%|██████████| 42/42 [00:10<00:00,  4.10it/s]


Accuracy: 0.7768
F1 Score: 0.7422
Test Accuracy: 0.7768, F1 Score: 0.7422


100%|██████████| 166/166 [00:41<00:00,  4.00it/s]


Epoch 50/50, Loss: 0.3512


100%|██████████| 42/42 [00:10<00:00,  4.01it/s]

Accuracy: 0.7768
F1 Score: 0.7422
Test Accuracy: 0.7768, F1 Score: 0.7422
Training complete!





In [None]:
best_integrated_model = torch.load('best_model.pth').to(device)

  best_integrated_model = torch.load('best_model.pth').to(device)


In [None]:
evaluate(best_integrated_model, test_loader, device)

100%|██████████| 42/42 [00:10<00:00,  4.07it/s]

Accuracy: 0.7888
F1 Score: 0.7500





(0.7888386123680241, 0.75)