In [5]:
import os
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset, Subset
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from torch.nn import CrossEntropyLoss
from belief_maps import filenames, combined_belief_map

In [None]:
#!pip install torch transformers scikit-learn matplotlib seaborn

In [14]:
# labels texts based on a belief map (0 for 'Negative', 1 for 'Positive'),
# and returns two lists: texts containing the file content and 
# labels for classification. 
def read_text_files(directory, filenames, belief_map):
    texts = []
    labels = []
    for filename in filenames:
        file_path = os.path.join(directory, filename)
        if os.path.exists(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read()
                texts.append(text)
                labels.append(0 if belief_map[filename] == 'Negative' else 1)
        else:
            print(f"File {filename} not found in directory {directory}.")
    return texts, labels

# Take data from working directory in folder "All_Texts" and categorise by label
directory = os.path.join(os.getcwd(), "All_Texts")

texts, labels = read_text_files(directory, filenames, combined_belief_map)
labels = [1 if combined_belief_map[filename] == 'Positive' else 0 for filename in filenames]

# Split into training and test sets
train_texts, temp_texts, train_labels, temp_labels = train_test_split(texts, labels, test_size=0.3, random_state=42)
val_texts, test_texts, val_labels, test_labels = train_test_split(temp_texts, temp_labels, test_size=0.5, random_state=42)

# The BERT tokenizer and model are initialized with
# pre-trained weights from the 'bert-base-uncased' model, configured 
# for binary classification.
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
# using BERT’s tokenizer, applying padding and truncation to ensure uniform input length, 
# and converting the outputs to PyTorch tensors with a maximum sequence length of 512 token

train_inputs = tokenizer(train_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
val_inputs = tokenizer(val_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
test_inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)

# Convert labels to tensors
train_labels = torch.tensor(train_labels)
val_labels = torch.tensor(val_labels)
test_labels = torch.tensor(test_labels)

# Create datasets
train_dataset = TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_labels)
val_dataset = TensorDataset(val_inputs['input_ids'], val_inputs['attention_mask'], val_labels)
test_dataset = TensorDataset(test_inputs['input_ids'], test_inputs['attention_mask'], test_labels)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=8, sampler=RandomSampler(train_dataset))
validation_loader = DataLoader(val_dataset, batch_size=8, sampler=SequentialSampler(val_dataset))
test_loader = DataLoader(test_dataset, batch_size=8, sampler=SequentialSampler(test_dataset))

In [1]:
# Set up the AdamW optimizer with a learning rate of 2e-5 and epsilon of 1e-8 for numerical stability. 
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)

# Check for GPU availability and move the model to the appropriate device (GPU if available, otherwise CPU)
# for efficient training.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Evaluate the model’s performance on a given data loader. The model is set to evaluation mode, 
# and predictions are made without gradient calculations (torch.no_grad()). For each batch, input 
# data and labels are moved to the appropriate device, and predictions are generated. Accuracy,
# precision, recall, and F1 score are calculated based on the predictions and returned as evaluation metrics.
def evaluate(model, data_loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    return accuracy, precision, recall, f1

NameError: name 'AdamW' is not defined

In [20]:
criterion = CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=1e-6, eps=1e-8, weight_decay=0.01)

def run_training_and_evaluation():
    """Initialize the CrossEntropyLoss criterion and configure the AdamW optimizer with
    a lower learning rate and weight decay for regularization. The run_training_and_evaluation 
    function performs 3-fold cross-validation on the training dataset, splitting it into different training
    and validation subsets for each fold. Within each fold, a training loop runs for a specified number of 
    epochs, optimizing the model using backpropagation. After training, the model is evaluated on the 
    validation set using accuracy, precision, recall, and F1-score. Finally, the model’s performance is 
    evaluated on the test set, returning the test accuracy."""
    kf = KFold(n_splits=3, shuffle=True, random_state=42)
    for fold, (train_index, val_index) in enumerate(kf.split(train_dataset)):
        print(f"Fold {fold+1}")
        train_subset = Subset(train_dataset, train_index)
        val_subset = Subset(train_dataset, val_index)
        train_loader = DataLoader(train_subset, batch_size=4, sampler=RandomSampler(train_subset))
        validation_loader = DataLoader(val_subset, batch_size=4, sampler=SequentialSampler(val_subset))
        
        # Training loop
        for epoch in range(3):  # Adjust the number of epochs as needed
            model.train()
            total_loss = 0
            for batch in train_loader:
                batch = tuple(t.to(device) for t in batch)
                inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[2]}
                optimizer.zero_grad()
                outputs = model(**inputs)
                loss = criterion(outputs.logits, batch[2])
                total_loss += loss.item()
                loss.backward()
                optimizer.step()
            avg_train_loss = total_loss / len(train_loader)
            print(f"Epoch {epoch+1}, Loss: {avg_train_loss}")
        
        # Validation loop
        accuracy, precision, recall, f1 = evaluate(model, validation_loader)
        print(f"Fold {fold+1} - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

    # Final evaluation on the test set
    test_accuracy, test_precision, test_recall, test_f1 = evaluate(model, test_loader)
    return test_accuracy



In [22]:
num_runs = 10
total_accuracy = 0

# Run the model 10 times to ensure reliability of results. For each run,
# reinitialize the BERT model and optimizer to avoid any carryover effects 
# from previous runs. The accuracy from each run is accumulated, and the average
# test set accuracy is calculated and printed at the end
# for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    # Reinitialize the model and optimizer for each run
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2).to(device)
    optimizer = AdamW(model.parameters(), lr=1e-6, eps=1e-8, weight_decay=0.01)
    
    # Run the training and evaluation
    accuracy = run_training_and_evaluation()
    total_accuracy += accuracy

average_accuracy = total_accuracy / num_runs
print(f"Average Test Set Accuracy over {num_runs} runs: {average_accuracy:.4f}")

Run 1/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.6681465581059456
Epoch 2, Loss: 0.6434632353484631
Epoch 3, Loss: 0.6305680051445961


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Accuracy: 0.6000, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Fold 2
Epoch 1, Loss: 0.6327815987169743
Epoch 2, Loss: 0.6591181084513664
Epoch 3, Loss: 0.6957699060440063
Fold 2 - Accuracy: 0.6000, Precision: 1.0000, Recall: 0.2500, F1 Score: 0.4000
Fold 3
Epoch 1, Loss: 0.6953234821557999
Epoch 2, Loss: 0.6642161980271339
Epoch 3, Loss: 0.6597122699022293
Fold 3 - Accuracy: 0.7857, Precision: 1.0000, Recall: 0.4000, F1 Score: 0.5714
Run 2/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.6113217882812023
Epoch 2, Loss: 0.6669505089521408
Epoch 3, Loss: 0.652454137802124


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Accuracy: 0.6000, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Fold 2
Epoch 1, Loss: 0.6493916660547256
Epoch 2, Loss: 0.635316863656044
Epoch 3, Loss: 0.6614585742354393
Fold 2 - Accuracy: 0.6667, Precision: 1.0000, Recall: 0.3750, F1 Score: 0.5455
Fold 3
Epoch 1, Loss: 0.6573622673749924
Epoch 2, Loss: 0.6564330011606216
Epoch 3, Loss: 0.6308974102139473
Fold 3 - Accuracy: 0.9286, Precision: 1.0000, Recall: 0.8000, F1 Score: 0.8889
Run 3/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.7111102491617203
Epoch 2, Loss: 0.6819249987602234
Epoch 3, Loss: 0.7054719999432564
Fold 1 - Accuracy: 0.8000, Precision: 0.6667, Recall: 1.0000, F1 Score: 0.8000
Fold 2
Epoch 1, Loss: 0.7041812688112259
Epoch 2, Loss: 0.6506929025053978
Epoch 3, Loss: 0.6812635734677315
Fold 2 - Accuracy: 0.6000, Precision: 0.7500, Recall: 0.3750, F1 Score: 0.5000
Fold 3
Epoch 1, Loss: 0.6702312082052231
Epoch 2, Loss: 0.6655416637659073
Epoch 3, Loss: 0.6413455978035927
Fold 3 - Accuracy: 0.9286, Precision: 0.8333, Recall: 1.0000, F1 Score: 0.9091
Run 4/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.6626507788896561
Epoch 2, Loss: 0.6432201750576496
Epoch 3, Loss: 0.6476310528814793


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 1 - Accuracy: 0.6000, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Fold 2
Epoch 1, Loss: 0.6540769636631012
Epoch 2, Loss: 0.6269956417381763
Epoch 3, Loss: 0.6602695360779762


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 2 - Accuracy: 0.4667, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Fold 3
Epoch 1, Loss: 0.6821461990475655
Epoch 2, Loss: 0.6584043502807617
Epoch 3, Loss: 0.6617418080568314


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Accuracy: 0.6429, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Run 5/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.6997877061367035
Epoch 2, Loss: 0.6583363115787506
Epoch 3, Loss: 0.6817423775792122
Fold 1 - Accuracy: 0.2667, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Fold 2
Epoch 1, Loss: 0.6844698712229729
Epoch 2, Loss: 0.6694984063506126
Epoch 3, Loss: 0.6595005020499229
Fold 2 - Accuracy: 0.5333, Precision: 0.6667, Recall: 0.2500, F1 Score: 0.3636
Fold 3
Epoch 1, Loss: 0.7330388724803925
Epoch 2, Loss: 0.675837229937315
Epoch 3, Loss: 0.6818124651908875


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Fold 3 - Accuracy: 0.6429, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Run 6/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.6976071372628212
Epoch 2, Loss: 0.6760995164513588
Epoch 3, Loss: 0.6233232654631138
Fold 1 - Accuracy: 0.6667, Precision: 1.0000, Recall: 0.1667, F1 Score: 0.2857
Fold 2
Epoch 1, Loss: 0.5968341380357742
Epoch 2, Loss: 0.596122495830059
Epoch 3, Loss: 0.6384907811880112
Fold 2 - Accuracy: 0.6667, Precision: 1.0000, Recall: 0.3750, F1 Score: 0.5455
Fold 3
Epoch 1, Loss: 0.6381660848855972
Epoch 2, Loss: 0.637650802731514
Epoch 3, Loss: 0.6263613551855087
Fold 3 - Accuracy: 1.0000, Precision: 1.0000, Recall: 1.0000, F1 Score: 1.0000
Run 7/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.7032582312822342
Epoch 2, Loss: 0.6973480135202408
Epoch 3, Loss: 0.6675153970718384
Fold 1 - Accuracy: 0.6667, Precision: 1.0000, Recall: 0.1667, F1 Score: 0.2857
Fold 2
Epoch 1, Loss: 0.6692741960287094
Epoch 2, Loss: 0.6600122973322868
Epoch 3, Loss: 0.6536164283752441
Fold 2 - Accuracy: 0.6667, Precision: 1.0000, Recall: 0.3750, F1 Score: 0.5455
Fold 3
Epoch 1, Loss: 0.6823406293988228
Epoch 2, Loss: 0.6567135378718376
Epoch 3, Loss: 0.6429727226495743
Fold 3 - Accuracy: 0.9286, Precision: 1.0000, Recall: 0.8000, F1 Score: 0.8889
Run 8/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.8053115531802177
Epoch 2, Loss: 0.7891221195459366
Epoch 3, Loss: 0.7416359111666679
Fold 1 - Accuracy: 0.4000, Precision: 0.4000, Recall: 1.0000, F1 Score: 0.5714
Fold 2
Epoch 1, Loss: 0.7609340697526932
Epoch 2, Loss: 0.7262724339962006
Epoch 3, Loss: 0.7034813463687897
Fold 2 - Accuracy: 0.6667, Precision: 0.6667, Recall: 0.7500, F1 Score: 0.7059
Fold 3
Epoch 1, Loss: 0.667810283601284
Epoch 2, Loss: 0.7215845584869385
Epoch 3, Loss: 0.7030531913042068
Fold 3 - Accuracy: 0.8571, Precision: 1.0000, Recall: 0.6000, F1 Score: 0.7500
Run 9/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.6870690137147903
Epoch 2, Loss: 0.678669884800911
Epoch 3, Loss: 0.6610021218657494
Fold 1 - Accuracy: 0.4000, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
Fold 2
Epoch 1, Loss: 0.6654654517769814
Epoch 2, Loss: 0.6255526468157768
Epoch 3, Loss: 0.6758321821689606
Fold 2 - Accuracy: 0.6000, Precision: 0.7500, Recall: 0.3750, F1 Score: 0.5000
Fold 3
Epoch 1, Loss: 0.663232296705246
Epoch 2, Loss: 0.6685125008225441
Epoch 3, Loss: 0.6684006303548813
Fold 3 - Accuracy: 0.7857, Precision: 1.0000, Recall: 0.4000, F1 Score: 0.5714
Run 10/10


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1
Epoch 1, Loss: 0.8052205964922905
Epoch 2, Loss: 0.7692593336105347
Epoch 3, Loss: 0.7198833376169205
Fold 1 - Accuracy: 0.7333, Precision: 0.7500, Recall: 0.5000, F1 Score: 0.6000
Fold 2
Epoch 1, Loss: 0.6868038401007652
Epoch 2, Loss: 0.6505073495209217
Epoch 3, Loss: 0.6204651184380054
Fold 2 - Accuracy: 0.8000, Precision: 1.0000, Recall: 0.6250, F1 Score: 0.7692
Fold 3
Epoch 1, Loss: 0.6586970537900925
Epoch 2, Loss: 0.6828380152583122
Epoch 3, Loss: 0.6323940679430962
Fold 3 - Accuracy: 0.7143, Precision: 0.6667, Recall: 0.4000, F1 Score: 0.5000
Average Test Set Accuracy over 10 runs: 0.6400
