In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import time
from transformers import BertModel, BertTokenizer
import torch
from torch.optim import AdamW
from transformers import BertForSequenceClassification
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score
from torch.nn import CrossEntropyLoss

In [None]:
train_df = pd.read_csv('/content/drive/My Drive/task3_data/twitter-2016train-A.tsv', sep='\t', header=None)
val_df = pd.read_csv('/content/drive/My Drive/task3_data/twitter-2016dev-A.tsv', sep='\t', header=None)
test_df = pd.read_csv('/content/drive/My Drive/task3_data/twitter-2016test-A.tsv', sep='\t', header=None)

In [None]:
# Generating labels for the texts
def produce_label(raw_data, one_hot=False):
    label = []
    for item in list(raw_data[1]):
        if item == 'negative':
            label.append(0)
        elif item == 'neutral':
            label.append(1)
        elif item == 'positive':
            label.append(2)
    label = np.array(label)
    if one_hot:
        oh_label = np.zeros((len(label), 3))
        for (i, item) in enumerate(label):
            oh_label[i, item] = 1.
        label = oh_label
    return label

In [None]:
# Tokenize input and generate input ids
def produce_input(text_data, max_length=64):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    encoded_batch = tokenizer(text_data, return_tensors='pt', padding='max_length', truncation=True, max_length=max_length)
    return encoded_batch['input_ids']

In [None]:
# Produce batch data for computing
def produce_batch_data(raw_data, batch_size=16, max_length=64, one_hot=False):
    num_length = len(raw_data)
    for n in range(0, num_length, batch_size):
        batch_data = raw_data.iloc[n:min(n+batch_size, num_length)]
        batch_label = produce_label(batch_data, one_hot)
        batch_input = [text for text in batch_data[2]]  # Assuming column 2 contains the text data
        yield batch_input, batch_label

In [None]:
# Confirms that the batch data can be sucessfully generated
for item in produce_batch_data(train_df):
    print(item)
    break

(["dear @Microsoft the newOoffice for Mac is great and all, but no Lync update? C'mon.", "@Microsoft how about you make a system that doesn't eat my friggin discs. This is the 2nd time this has happened and I am so sick of it!", "I may be ignorant on this issue but... should we celebrate @Microsoft's parental leave changes? Doesn't the gender divide suggest... (1/2)", 'Thanks to @microsoft, I just may be switching over to @apple.', 'If I make a game as a #windows10 Universal App. Will #xboxone owners be able to download and play it in November? @majornelson @Microsoft', 'Microsoft, I may not prefer your gaming branch of business. But, you do make a damn fine operating system. #Windows10 @Microsoft', '@MikeWolf1980 @Microsoft I will be downgrading and let #Windows10 be out for almost the 1st yr b4 trying it again. #Windows10fail', '@Microsoft 2nd computer with same error!!! #Windows10fail Guess we will shelve this until SP1! http://t.co/QCcHlKuy8Q', 'Just ordered my 1st ever tablet; @Mi

In [None]:
# Check cuda availablity
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using GPU:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: Tesla T4


In [None]:
# Function to convert the regression prediction back to a categorical prediction
def convert_to_categorical(predictions, threshold=0.5):
    # Assuming: 0 for 'negative', 1 for 'neutral', 2 for 'positive'
    categories = []
    for pred in predictions:
        if pred < 1 - threshold:
            categories.append(0)  # Negative
        elif pred > 1 + threshold:
            categories.append(2)  # Positive
        else:
            categories.append(1)  # Neutral
    return categories

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from torch.nn import MSELoss
from transformers import BertForSequenceClassification, AdamW
from tqdm.auto import tqdm
import torch
import numpy as np

def initialize_model():
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)
    return model.to(device)

def train_and_validate(model, train_df, val_df, device, num_epochs=10, batch_size=16):
    optimizer = AdamW(model.parameters(), lr=2e-5)
    loss_fn = MSELoss()  # Using MSE Loss

    for epoch in tqdm(range(num_epochs), desc='Epochs'):
        model.train()
        total_loss = 0
        train_predictions, train_labels = [], []
        batch_iterator = produce_batch_data(train_df, batch_size=batch_size, max_length=64, one_hot=False)

        for batch_inputs, batch_labels in tqdm(batch_iterator, desc='Batches', leave=True):
            batch_input = produce_input(batch_inputs, max_length=64)
            batch_labels = torch.tensor(batch_labels, dtype=torch.float)

            batch_input = batch_input.to(device)
            batch_labels = batch_labels.to(device)

            model.zero_grad()

            outputs = model(batch_input)
            loss = loss_fn(outputs.logits.squeeze(), batch_labels)
            total_loss += loss.item()

            categorical_preds = convert_to_categorical(outputs.logits.squeeze().detach().cpu().numpy())
            train_predictions.extend(categorical_preds)
            train_labels.extend(batch_labels.cpu().numpy())
            loss.backward()
            optimizer.step()

        avg_loss = total_loss / len(train_df) * batch_size
        train_accuracy = accuracy_score(train_labels, train_predictions)
        train_precision, train_recall, train_f1, _ = precision_recall_fscore_support(train_labels, train_predictions, average='macro', zero_division=0)

        print(f"Training Epoch {epoch} - Loss: {avg_loss:.4f}, Accuracy: {train_accuracy:.4f}, Precision: {train_precision:.4f}, Recall: {train_recall:.4f}, F1-Score: {train_f1:.4f}")

        model.eval()
        total_val_loss = 0
        val_predictions, val_labels = [], []

        with torch.no_grad():
            val_batch_iterator = produce_batch_data(val_df, batch_size=batch_size, max_length=64, one_hot=False)
            for val_batch_inputs, val_batch_labels in tqdm(val_batch_iterator, desc='Batches', leave=True):
                val_batch_input = produce_input(val_batch_inputs, max_length=64)
                val_batch_labels = torch.tensor(val_batch_labels, dtype=torch.float)

                val_batch_input = val_batch_input.to(device)
                val_batch_labels = val_batch_labels.to(device)

                val_outputs = model(val_batch_input)
                val_loss = loss_fn(val_outputs.logits.squeeze(), val_batch_labels)
                total_val_loss += val_loss.item()

                categorical_preds = convert_to_categorical(val_outputs.logits.squeeze().detach().cpu().numpy())
                val_predictions.extend(categorical_preds)
                val_labels.extend(val_batch_labels.cpu().numpy())

            avg_val_loss = total_val_loss / len(val_df) * batch_size

            accuracy = accuracy_score(val_labels, val_predictions)
            precision, recall, f1, _ = precision_recall_fscore_support(val_labels, val_predictions, average='macro', zero_division=0)

            print(f"Validation Epoch {epoch} - Loss: {avg_val_loss:.4f}, Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

    return model

def test_model(model, test_data, batch_size=16, max_length=64):
    model.eval()
    total_test_loss = 0
    test_predictions, test_labels = [], []
    loss_fn = MSELoss()

    with torch.no_grad():
        test_iterator = produce_batch_data(test_data, batch_size=batch_size, max_length=max_length, one_hot=False)
        for test_batch_inputs, test_batch_labels in tqdm(test_iterator, desc='Testing', leave=False):
            test_batch_input = produce_input(test_batch_inputs, max_length=max_length)
            test_batch_labels = torch.tensor(test_batch_labels, dtype=torch.float)

            test_batch_input = test_batch_input.to(device)
            test_batch_labels = test_batch_labels.to(device)

            test_outputs = model(test_batch_input)
            test_loss = loss_fn(test_outputs.logits.squeeze(), test_batch_labels)
            total_test_loss += test_loss.item()

            categorical_preds = convert_to_categorical(test_outputs.logits.squeeze().cpu().numpy())
            test_predictions.extend(categorical_preds)
            test_labels.extend(test_batch_labels.cpu().numpy())

    avg_test_loss = total_test_loss / len(test_data) * batch_size

    test_accuracy = accuracy_score(test_labels, test_predictions)
    test_precision, test_recall, test_f1, _ = precision_recall_fscore_support(test_labels, test_predictions, average='macro', zero_division=0)

    return avg_test_loss, test_accuracy, test_precision, test_recall, test_f1





In [None]:
# Script to Run Multiple Times with Detailed Output
num_runs = 5
test_results_all_runs = []

for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    model = initialize_model()  # Initialize a new model for each run
    trained_model = train_and_validate(model, train_df, val_df, device, num_epochs=10, batch_size=16)
    avg_test_loss, test_accuracy, test_precision, test_recall, test_f1 = test_model(trained_model, test_df, batch_size=16, max_length=64)
    test_results_all_runs.append((avg_test_loss, test_accuracy, test_precision, test_recall, test_f1))

    # Print results for the current run
    print(f"Results for Run {run + 1}:")
    print(f"  Test Loss: {avg_test_loss:.4f}")
    print(f"  Test Accuracy: {test_accuracy:.4f}")
    print(f"  Test Precision: {test_precision:.4f}")
    print(f"  Test Recall: {test_recall:.4f}")
    print(f"  Test F1-Score: {test_f1:.4f}\n")

# Convert list to numpy array for easier calculation
test_results_all_runs = np.array(test_results_all_runs)

# Calculate mean and standard deviation across runs
mean_test_results = np.mean(test_results_all_runs, axis=0)
std_test_results = np.std(test_results_all_runs, axis=0)

# Print the averaged results
print("Averaged Test Results Across Runs:")
print(f"  Mean Test Loss: {mean_test_results[0]:.4f} (±{std_test_results[0]:.4f})")
print(f"  Mean Test Accuracy: {mean_test_results[1]:.4f} (±{std_test_results[1]:.4f})")
print(f"  Mean Test Precision: {mean_test_results[2]:.4f} (±{std_test_results[2]:.4f})")
print(f"  Mean Test Recall: {mean_test_results[3]:.4f} (±{std_test_results[3]:.4f})")
print(f"  Mean Test F1-Score: {mean_test_results[4]:.4f} (±{std_test_results[4]:.4f})")

Run 1/5


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Batches: 0it [00:00, ?it/s]

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Training Epoch 0 - Loss: 0.4783, Accuracy: 0.5082, Precision: 0.5260, Recall: 0.4274, F1-Score: 0.4076


Batches: 0it [00:00, ?it/s]

Validation Epoch 0 - Loss: 0.4071, Accuracy: 0.5753, Precision: 0.6476, Recall: 0.4833, F1-Score: 0.4467


Batches: 0it [00:00, ?it/s]

Training Epoch 1 - Loss: 0.3085, Accuracy: 0.6525, Precision: 0.6546, Recall: 0.5804, F1-Score: 0.5954


Batches: 0it [00:00, ?it/s]

Validation Epoch 1 - Loss: 0.4279, Accuracy: 0.5931, Precision: 0.6058, Recall: 0.5216, F1-Score: 0.5175


Batches: 0it [00:00, ?it/s]

Training Epoch 2 - Loss: 0.2193, Accuracy: 0.7304, Precision: 0.7172, Recall: 0.6840, F1-Score: 0.6971


Batches: 0it [00:00, ?it/s]

Validation Epoch 2 - Loss: 0.4466, Accuracy: 0.5972, Precision: 0.6044, Recall: 0.5447, F1-Score: 0.5510


Batches: 0it [00:00, ?it/s]

Training Epoch 3 - Loss: 0.1439, Accuracy: 0.8243, Precision: 0.8127, Recall: 0.8039, F1-Score: 0.8081


Batches: 0it [00:00, ?it/s]

Validation Epoch 3 - Loss: 0.4081, Accuracy: 0.6058, Precision: 0.6058, Recall: 0.5608, F1-Score: 0.5707


Batches: 0it [00:00, ?it/s]

Training Epoch 4 - Loss: 0.1109, Accuracy: 0.8695, Precision: 0.8605, Recall: 0.8533, F1-Score: 0.8567


Batches: 0it [00:00, ?it/s]

Validation Epoch 4 - Loss: 0.4017, Accuracy: 0.6134, Precision: 0.6195, Recall: 0.5727, F1-Score: 0.5843


Batches: 0it [00:00, ?it/s]

Training Epoch 5 - Loss: 0.0793, Accuracy: 0.9184, Precision: 0.9131, Recall: 0.9095, F1-Score: 0.9111


Batches: 0it [00:00, ?it/s]

Validation Epoch 5 - Loss: 0.3851, Accuracy: 0.6053, Precision: 0.6092, Recall: 0.5747, F1-Score: 0.5850


Batches: 0it [00:00, ?it/s]

Training Epoch 6 - Loss: 0.0558, Accuracy: 0.9477, Precision: 0.9432, Recall: 0.9404, F1-Score: 0.9418


Batches: 0it [00:00, ?it/s]

Validation Epoch 6 - Loss: 0.4271, Accuracy: 0.5972, Precision: 0.6030, Recall: 0.5451, F1-Score: 0.5540


Batches: 0it [00:00, ?it/s]

Training Epoch 7 - Loss: 0.0444, Accuracy: 0.9611, Precision: 0.9610, Recall: 0.9566, F1-Score: 0.9587


Batches: 0it [00:00, ?it/s]

Validation Epoch 7 - Loss: 0.4652, Accuracy: 0.6083, Precision: 0.6102, Recall: 0.5555, F1-Score: 0.5615


Batches: 0it [00:00, ?it/s]

Training Epoch 8 - Loss: 0.0340, Accuracy: 0.9715, Precision: 0.9681, Recall: 0.9658, F1-Score: 0.9669


Batches: 0it [00:00, ?it/s]

Validation Epoch 8 - Loss: 0.4406, Accuracy: 0.6078, Precision: 0.6084, Recall: 0.5555, F1-Score: 0.5636


Batches: 0it [00:00, ?it/s]

Training Epoch 9 - Loss: 0.0238, Accuracy: 0.9806, Precision: 0.9799, Recall: 0.9763, F1-Score: 0.9781


Batches: 0it [00:00, ?it/s]

Validation Epoch 9 - Loss: 0.4843, Accuracy: 0.5956, Precision: 0.6049, Recall: 0.5372, F1-Score: 0.5404


Testing: 0it [00:00, ?it/s]

Results for Run 1:
  Test Loss: 0.4773
  Test Accuracy: 0.5473
  Test Precision: 0.6108
  Test Recall: 0.5379
  Test F1-Score: 0.5170

Run 2/5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Batches: 0it [00:00, ?it/s]

Training Epoch 0 - Loss: 0.5411, Accuracy: 0.4543, Precision: 0.4983, Recall: 0.3802, F1-Score: 0.3361


Batches: 0it [00:00, ?it/s]

Validation Epoch 0 - Loss: 0.4020, Accuracy: 0.5748, Precision: 0.3819, Recall: 0.4749, F1-Score: 0.4233


Batches: 0it [00:00, ?it/s]

Training Epoch 1 - Loss: 0.3395, Accuracy: 0.6220, Precision: 0.6225, Recall: 0.5348, F1-Score: 0.5416


Batches: 0it [00:00, ?it/s]

Validation Epoch 1 - Loss: 0.4403, Accuracy: 0.5829, Precision: 0.5846, Recall: 0.5260, F1-Score: 0.5271


Batches: 0it [00:00, ?it/s]

Training Epoch 2 - Loss: 0.2230, Accuracy: 0.7336, Precision: 0.7195, Recall: 0.6830, F1-Score: 0.6967


Batches: 0it [00:00, ?it/s]

Validation Epoch 2 - Loss: 0.3984, Accuracy: 0.6160, Precision: 0.6123, Recall: 0.5745, F1-Score: 0.5838


Batches: 0it [00:00, ?it/s]

Training Epoch 3 - Loss: 0.1522, Accuracy: 0.8113, Precision: 0.7971, Recall: 0.7866, F1-Score: 0.7915


Batches: 0it [00:00, ?it/s]

Validation Epoch 3 - Loss: 0.3669, Accuracy: 0.6170, Precision: 0.6270, Recall: 0.5952, F1-Score: 0.6036


Batches: 0it [00:00, ?it/s]

Training Epoch 4 - Loss: 0.1240, Accuracy: 0.8463, Precision: 0.8323, Recall: 0.8301, F1-Score: 0.8310


Batches: 0it [00:00, ?it/s]

Validation Epoch 4 - Loss: 0.3835, Accuracy: 0.6048, Precision: 0.6068, Recall: 0.5644, F1-Score: 0.5746


Batches: 0it [00:00, ?it/s]

Training Epoch 5 - Loss: 0.0861, Accuracy: 0.9076, Precision: 0.9041, Recall: 0.9037, F1-Score: 0.9037


Batches: 0it [00:00, ?it/s]

Validation Epoch 5 - Loss: 0.4138, Accuracy: 0.6058, Precision: 0.6209, Recall: 0.5494, F1-Score: 0.5577


Batches: 0it [00:00, ?it/s]

Training Epoch 6 - Loss: 0.0577, Accuracy: 0.9451, Precision: 0.9416, Recall: 0.9402, F1-Score: 0.9408


Batches: 0it [00:00, ?it/s]

Validation Epoch 6 - Loss: 0.4393, Accuracy: 0.6114, Precision: 0.6222, Recall: 0.5579, F1-Score: 0.5670


Batches: 0it [00:00, ?it/s]

Training Epoch 7 - Loss: 0.0422, Accuracy: 0.9627, Precision: 0.9587, Recall: 0.9565, F1-Score: 0.9576


Batches: 0it [00:00, ?it/s]

Validation Epoch 7 - Loss: 0.4267, Accuracy: 0.6109, Precision: 0.6284, Recall: 0.5591, F1-Score: 0.5699


Batches: 0it [00:00, ?it/s]

Training Epoch 8 - Loss: 0.0315, Accuracy: 0.9738, Precision: 0.9715, Recall: 0.9665, F1-Score: 0.9689


Batches: 0it [00:00, ?it/s]

Validation Epoch 8 - Loss: 0.4475, Accuracy: 0.6068, Precision: 0.6209, Recall: 0.5520, F1-Score: 0.5608


Batches: 0it [00:00, ?it/s]

Training Epoch 9 - Loss: 0.0284, Accuracy: 0.9724, Precision: 0.9713, Recall: 0.9668, F1-Score: 0.9690


Batches: 0it [00:00, ?it/s]

Validation Epoch 9 - Loss: 0.4403, Accuracy: 0.6119, Precision: 0.6088, Recall: 0.5675, F1-Score: 0.5769


Testing: 0it [00:00, ?it/s]

Results for Run 2:
  Test Loss: 0.4264
  Test Accuracy: 0.5928
  Test Precision: 0.6177
  Test Recall: 0.5892
  Test F1-Score: 0.5778

Run 3/5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Batches: 0it [00:00, ?it/s]

Training Epoch 0 - Loss: 0.5879, Accuracy: 0.4296, Precision: 0.4124, Recall: 0.3672, F1-Score: 0.3394


Batches: 0it [00:00, ?it/s]

Validation Epoch 0 - Loss: 0.7015, Accuracy: 0.4217, Precision: 0.1406, Recall: 0.3333, F1-Score: 0.1977


Batches: 0it [00:00, ?it/s]

Training Epoch 1 - Loss: 0.5752, Accuracy: 0.3872, Precision: 0.2845, Recall: 0.3323, F1-Score: 0.2730


Batches: 0it [00:00, ?it/s]

Validation Epoch 1 - Loss: 0.7205, Accuracy: 0.4217, Precision: 0.1406, Recall: 0.3333, F1-Score: 0.1977


Batches: 0it [00:00, ?it/s]

Training Epoch 2 - Loss: 0.5707, Accuracy: 0.3949, Precision: 0.2914, Recall: 0.3396, F1-Score: 0.2779


Batches: 0it [00:00, ?it/s]

Validation Epoch 2 - Loss: 0.7040, Accuracy: 0.4217, Precision: 0.1406, Recall: 0.3333, F1-Score: 0.1977


Batches: 0it [00:00, ?it/s]

Training Epoch 3 - Loss: 0.5739, Accuracy: 0.3880, Precision: 0.2856, Recall: 0.3348, F1-Score: 0.2723


Batches: 0it [00:00, ?it/s]

Validation Epoch 3 - Loss: 0.7062, Accuracy: 0.4217, Precision: 0.1406, Recall: 0.3333, F1-Score: 0.1977


Batches: 0it [00:00, ?it/s]

Training Epoch 4 - Loss: 0.5691, Accuracy: 0.3945, Precision: 0.2914, Recall: 0.3390, F1-Score: 0.2779


Batches: 0it [00:00, ?it/s]

Validation Epoch 4 - Loss: 0.6405, Accuracy: 0.4568, Precision: 0.3328, Recall: 0.3876, F1-Score: 0.3311


Batches: 0it [00:00, ?it/s]

Training Epoch 5 - Loss: 0.5643, Accuracy: 0.4110, Precision: 0.3375, Recall: 0.3502, F1-Score: 0.2924


Batches: 0it [00:00, ?it/s]

Validation Epoch 5 - Loss: 0.6314, Accuracy: 0.4858, Precision: 0.3263, Recall: 0.4048, F1-Score: 0.3599


Batches: 0it [00:00, ?it/s]

Training Epoch 6 - Loss: 0.5640, Accuracy: 0.4323, Precision: 0.4331, Recall: 0.3731, F1-Score: 0.3149


Batches: 0it [00:00, ?it/s]

Validation Epoch 6 - Loss: 0.6796, Accuracy: 0.4217, Precision: 0.1406, Recall: 0.3333, F1-Score: 0.1977


Batches: 0it [00:00, ?it/s]

Training Epoch 7 - Loss: 0.5482, Accuracy: 0.4412, Precision: 0.3286, Recall: 0.3748, F1-Score: 0.3134


Batches: 0it [00:00, ?it/s]

Validation Epoch 7 - Loss: 0.6897, Accuracy: 0.4217, Precision: 0.1406, Recall: 0.3333, F1-Score: 0.1977


Batches: 0it [00:00, ?it/s]

Training Epoch 8 - Loss: 0.5490, Accuracy: 0.4419, Precision: 0.3320, Recall: 0.3762, F1-Score: 0.3135


Batches: 0it [00:00, ?it/s]

Validation Epoch 8 - Loss: 0.6752, Accuracy: 0.4217, Precision: 0.1406, Recall: 0.3333, F1-Score: 0.1977


Batches: 0it [00:00, ?it/s]

Training Epoch 9 - Loss: 0.5545, Accuracy: 0.4371, Precision: 0.3245, Recall: 0.3723, F1-Score: 0.3099


Batches: 0it [00:00, ?it/s]

Validation Epoch 9 - Loss: 0.6700, Accuracy: 0.4334, Precision: 0.2873, Recall: 0.3588, F1-Score: 0.3190


Testing: 0it [00:00, ?it/s]

Results for Run 3:
  Test Loss: 0.5831
  Test Accuracy: 0.4472
  Test Precision: 0.2974
  Test Recall: 0.3557
  Test F1-Score: 0.3216

Run 4/5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Batches: 0it [00:00, ?it/s]

Training Epoch 0 - Loss: 0.5017, Accuracy: 0.4531, Precision: 0.5114, Recall: 0.3852, F1-Score: 0.3440


Batches: 0it [00:00, ?it/s]

Validation Epoch 0 - Loss: 0.5096, Accuracy: 0.5366, Precision: 0.3464, Recall: 0.4376, F1-Score: 0.3779


Batches: 0it [00:00, ?it/s]

Training Epoch 1 - Loss: 0.3194, Accuracy: 0.6403, Precision: 0.6439, Recall: 0.5590, F1-Score: 0.5711


Batches: 0it [00:00, ?it/s]

Validation Epoch 1 - Loss: 0.4602, Accuracy: 0.5768, Precision: 0.5908, Recall: 0.5064, F1-Score: 0.5003


Batches: 0it [00:00, ?it/s]

Training Epoch 2 - Loss: 0.2139, Accuracy: 0.7437, Precision: 0.7407, Recall: 0.6924, F1-Score: 0.7098


Batches: 0it [00:00, ?it/s]

Validation Epoch 2 - Loss: 0.4805, Accuracy: 0.5905, Precision: 0.6126, Recall: 0.5260, F1-Score: 0.5271


Batches: 0it [00:00, ?it/s]

Training Epoch 3 - Loss: 0.1572, Accuracy: 0.8040, Precision: 0.7902, Recall: 0.7662, F1-Score: 0.7768


Batches: 0it [00:00, ?it/s]

Validation Epoch 3 - Loss: 0.3843, Accuracy: 0.6083, Precision: 0.6204, Recall: 0.5653, F1-Score: 0.5770


Batches: 0it [00:00, ?it/s]

Training Epoch 4 - Loss: 0.1232, Accuracy: 0.8507, Precision: 0.8430, Recall: 0.8336, F1-Score: 0.8380


Batches: 0it [00:00, ?it/s]

Validation Epoch 4 - Loss: 0.3894, Accuracy: 0.6017, Precision: 0.6001, Recall: 0.5679, F1-Score: 0.5777


Batches: 0it [00:00, ?it/s]

Training Epoch 5 - Loss: 0.0895, Accuracy: 0.9047, Precision: 0.9019, Recall: 0.8955, F1-Score: 0.8985


Batches: 0it [00:00, ?it/s]

Validation Epoch 5 - Loss: 0.4248, Accuracy: 0.6083, Precision: 0.6085, Recall: 0.5614, F1-Score: 0.5703


Batches: 0it [00:00, ?it/s]

Training Epoch 6 - Loss: 0.0662, Accuracy: 0.9318, Precision: 0.9292, Recall: 0.9249, F1-Score: 0.9270


Batches: 0it [00:00, ?it/s]

Validation Epoch 6 - Loss: 0.4518, Accuracy: 0.5977, Precision: 0.6068, Recall: 0.5365, F1-Score: 0.5396


Batches: 0it [00:00, ?it/s]

Training Epoch 7 - Loss: 0.0471, Accuracy: 0.9589, Precision: 0.9578, Recall: 0.9567, F1-Score: 0.9572


Batches: 0it [00:00, ?it/s]

Validation Epoch 7 - Loss: 0.4523, Accuracy: 0.6048, Precision: 0.6002, Recall: 0.5591, F1-Score: 0.5649


Batches: 0it [00:00, ?it/s]

Training Epoch 8 - Loss: 0.0366, Accuracy: 0.9669, Precision: 0.9635, Recall: 0.9624, F1-Score: 0.9629


Batches: 0it [00:00, ?it/s]

Validation Epoch 8 - Loss: 0.4535, Accuracy: 0.6022, Precision: 0.6033, Recall: 0.5545, F1-Score: 0.5618


Batches: 0it [00:00, ?it/s]

Training Epoch 9 - Loss: 0.0317, Accuracy: 0.9732, Precision: 0.9716, Recall: 0.9710, F1-Score: 0.9713


Batches: 0it [00:00, ?it/s]

Validation Epoch 9 - Loss: 0.4113, Accuracy: 0.6043, Precision: 0.6129, Recall: 0.5775, F1-Score: 0.5878


Testing: 0it [00:00, ?it/s]

Results for Run 4:
  Test Loss: 0.3539
  Test Accuracy: 0.6368
  Test Precision: 0.6339
  Test Recall: 0.6173
  Test F1-Score: 0.6181

Run 5/5


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Batches: 0it [00:00, ?it/s]

Training Epoch 0 - Loss: 0.5489, Accuracy: 0.4451, Precision: 0.4681, Recall: 0.3793, F1-Score: 0.3438


Batches: 0it [00:00, ?it/s]

Validation Epoch 0 - Loss: 0.4207, Accuracy: 0.5488, Precision: 0.3664, Recall: 0.4540, F1-Score: 0.4052


Batches: 0it [00:00, ?it/s]