In [3]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("GPT.csv", encoding='ANSI')
security_class = ','.join(data['Category'].unique())
security_class = set(security_class.split(","))
df = data.copy()

for s in security_class:
    df[s] = 0
    df.loc[df['Category'].str.contains(s, regex=False), s] = 1

Round = 3  

fields = ['Notice', 'Breach', 'DataProcessing', 'Security', 'Complaint/Request', 'UserParticipation']

print('Bert:')

# Hyperparameters
epochs = 5  
batch_size = 8
learning_rate = 2e-5

for fold in fields:
    print(fold)
    train_data, test_data = train_test_split(df, test_size=0.2, random_state=42, stratify=df[fold])

    # Load the BERT tokenizer and model
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(security_class))

    # Tokenize the text data
    train_encodings = tokenizer(list(train_data['Article Text']), truncation=True, padding=True)
    test_encodings = tokenizer(list(test_data['Article Text']), truncation=True, padding=True)

    # Create PyTorch DataLoader for training and testing
    train_dataset = TensorDataset(torch.tensor(train_encodings['input_ids']),
                                  torch.tensor(train_encodings['attention_mask']),
                                  torch.tensor(train_data[fold].values))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    test_dataset = TensorDataset(torch.tensor(test_encodings['input_ids']),
                                 torch.tensor(test_encodings['attention_mask']),
                                 torch.tensor(test_data[fold].values))
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Set up optimizer and loss function
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    loss_fn = torch.nn.CrossEntropyLoss()

    # Training loop with hyperparameter tuning
    best_accuracy = 0.0
    best_model_state = None
    for epoch in range(epochs):
        model.train()
        for batch in train_loader:
            optimizer.zero_grad()
            inputs = {'input_ids': batch[0],
                      'attention_mask': batch[1]}
            labels = batch[2]
            outputs = model(**inputs)
            loss = loss_fn(outputs.logits, labels)
            loss.backward()
            optimizer.step()

        # Evaluate the model after each epoch on the validation set
        model.eval()
        total_correct = 0
        total_samples = 0
        with torch.no_grad():
            for batch in test_loader:
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1]}
                labels = batch[2]
                outputs = model(**inputs)
                _, predicted = torch.max(outputs.logits, 1)
                total_correct += (predicted == labels).sum().item()
                total_samples += len(labels)

        accuracy = total_correct / total_samples
        print(f'Epoch [{epoch + 1}/{epochs}] - Test Accuracy: {accuracy:.2f}')

        # Keep track of the best model based on validation accuracy
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model_state = model.state_dict()

    # Load the best model state for testing
    model.load_state_dict(best_model_state)

    # Evaluate the best model on the test set
    model.eval()
    true_labels = []
    predicted_labels = []
    with torch.no_grad():
        for batch in test_loader:
            inputs = {'input_ids': batch[0],
                      'attention_mask': batch[1]}
            labels = batch[2]
            outputs = model(**inputs)
            _, predicted = torch.max(outputs.logits, 1)
            true_labels.extend(labels.tolist())
            predicted_labels.extend(predicted.tolist())

    accuracy = (sum([1 for i, j in zip(true_labels, predicted_labels) if i == j]) / len(true_labels))*100
    precision = precision_score(true_labels, predicted_labels, average='weighted') * 100
    recall = recall_score(true_labels, predicted_labels, average='weighted') * 100
    f1 = f1_score(true_labels, predicted_labels, average='weighted') * 100
    
    print(f'Final Test Accuracy: {accuracy:.2f}')
    print(f'Precision: {precision:.{Round}f}')
    print(f'Recall: {recall:.{Round}f}')
    print(f'F1 Score: {f1:.{Round}f}')
    
    # Calculate and plot the confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)
    print(cm)
    

Bert:
Notice


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Epoch [1/5] - Test Accuracy: 0.71
Epoch [2/5] - Test Accuracy: 0.71
Epoch [3/5] - Test Accuracy: 0.71
Epoch [4/5] - Test Accuracy: 0.71
Epoch [5/5] - Test Accuracy: 0.71


  _warn_prf(average, modifier, msg_start, len(result))


Final Test Accuracy: 71.43
Precision: 51.020
Recall: 71.429
F1 Score: 59.524
[[10  0]
 [ 4  0]]
Breach


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Epoch [1/5] - Test Accuracy: 0.93
Epoch [2/5] - Test Accuracy: 0.93
Epoch [3/5] - Test Accuracy: 0.93
Epoch [4/5] - Test Accuracy: 0.93
Epoch [5/5] - Test Accuracy: 0.93


  _warn_prf(average, modifier, msg_start, len(result))


Final Test Accuracy: 92.86
Precision: 86.224
Recall: 92.857
F1 Score: 89.418
[[13  0]
 [ 1  0]]
DataProcessing


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Epoch [1/5] - Test Accuracy: 0.79
Epoch [2/5] - Test Accuracy: 0.79
Epoch [3/5] - Test Accuracy: 0.79
Epoch [4/5] - Test Accuracy: 0.79
Epoch [5/5] - Test Accuracy: 0.79


  _warn_prf(average, modifier, msg_start, len(result))


Final Test Accuracy: 78.57
Precision: 61.735
Recall: 78.571
F1 Score: 69.143
[[11  0]
 [ 3  0]]
Security


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Epoch [1/5] - Test Accuracy: 0.93
Epoch [2/5] - Test Accuracy: 0.93
Epoch [3/5] - Test Accuracy: 0.93
Epoch [4/5] - Test Accuracy: 0.93
Epoch [5/5] - Test Accuracy: 0.93


  _warn_prf(average, modifier, msg_start, len(result))


Final Test Accuracy: 92.86
Precision: 86.224
Recall: 92.857
F1 Score: 89.418
[[13  0]
 [ 1  0]]
Complaint/Request


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Epoch [1/5] - Test Accuracy: 0.93
Epoch [2/5] - Test Accuracy: 0.93
Epoch [3/5] - Test Accuracy: 0.93
Epoch [4/5] - Test Accuracy: 0.93
Epoch [5/5] - Test Accuracy: 0.93


  _warn_prf(average, modifier, msg_start, len(result))


Final Test Accuracy: 92.86
Precision: 86.224
Recall: 92.857
F1 Score: 89.418
[[13  0]
 [ 1  0]]
UserParticipation


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

Epoch [1/5] - Test Accuracy: 0.71
Epoch [2/5] - Test Accuracy: 0.71
Epoch [3/5] - Test Accuracy: 0.64
Epoch [4/5] - Test Accuracy: 0.79
Epoch [5/5] - Test Accuracy: 0.86
Final Test Accuracy: 85.71
Precision: 85.714
Recall: 85.714
F1 Score: 85.714
[[9 1]
 [1 3]]
