#### Neural Network Data Training/Testing for Shield Use

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler
import torch.nn as nn
import torch
import pandas as pd
import numpy as np
import time

def format_time(seconds):
    """Formats time in seconds into hours/minutes/seconds and returns a string of the resulting time."""
    minutes = int(seconds // 60)
    hours = int(minutes // 60)
    minutes = minutes % 60
    seconds = seconds % 60
    if hours > 0:
        return f"{hours} hours, {minutes} minute{'s' if minutes != 1 else ''}, {seconds:.2f} seconds"
    if minutes > 0:
        return f"{minutes} minute{'s' if minutes != 1 else ''}, {seconds:.2f} seconds"
    return f"{seconds:.2f} seconds"

In [79]:
# Import the data.
df = pd.read_csv("Shield_Experience_31.03.2025_13.41.csv")
print(f"Number of experiences: {len(df)}")

# Split data into batches by sets of 20k. Last 20k episodes excluded, as will not train shield again at the end of training.
Batch_1_1_to_20000 = df[df['Episode'].between(1, 20000 )]
Batch_2_20001_to_40000 = df[df['Episode'].between(20001, 40000)]
Batch_3_40001_to_60000 = df[df['Episode'].between(40001, 60000)]
Batch_4_60001_to_80000 = df[df['Episode'].between(60001, 80000)]
Batch_5_80001_to_100000 = df[df['Episode'].between(80001, 100000)]

batches = [Batch_1_1_to_20000, Batch_2_20001_to_40000, Batch_3_40001_to_60000, Batch_4_60001_to_80000, Batch_5_80001_to_100000]

saff = 0
unsaff = 0
for i in batches: # Check batch sizes.
    print("Batch total: ", len(i))
    
    zeros = len(i[i['Safe'] == 0])
    print("Unsafe: ", zeros,f" - {zeros/len(i)*100:.2f}%")
    ones = len(i[i['Safe'] == 1])
    saff += ones
    unsaff += zeros
    print("Safe: ", ones, f" - {ones/len(i)*100:.2f}%", end="\n\n" )
    # Safe and unsafe for each batch.

print(f"Safe total: {saff}, {saff/(saff+unsaff)*100:.2f}%   Unsafe total: {unsaff}, {unsaff/(saff+unsaff)*100:.2f}%")

Number of experiences: 2951771
Batch total:  604987
Unsafe:  19441  - 3.21%
Safe:  585546  - 96.79%

Batch total:  549861
Unsafe:  13559  - 2.47%
Safe:  536302  - 97.53%

Batch total:  457828
Unsafe:  3769  - 0.82%
Safe:  454059  - 99.18%

Batch total:  453604
Unsafe:  3788  - 0.84%
Safe:  449816  - 99.16%

Batch total:  450414
Unsafe:  4301  - 0.95%
Safe:  446113  - 99.05%

Safe total: 2471836, 98.22%   Unsafe total: 44858, 1.78%


###### Create Training/Testing Data

In [80]:
# There will be one training batch made up of all five batches of episodes, due to the nature of testing many hyperparameters
# in a grid search, the 5 steps and then using the average accuracy and recall as was done with Naive Bayes is too time consuming.
last_20ks = []
for b in batches:
    # Append last 20,000 experiences in each batch to the training batch.
    last_20ks.append(b.tail(20000))

# Concatonate these to create the dataset to analyse network performace on.
batch = pd.concat(last_20ks, ignore_index=True)
print("Dataset size: ", len(batch))

# Split into input data and targets.
X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
Y = batch["Safe"]

# Split into Train and test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

# Convert to a numpy array, then to a Tensor, with reshaping for the targets(Y_train and Y_test).
numpy_array = X_train.values # First, convert to a Numpy array.
X_train = torch.tensor(numpy_array, dtype=torch.float32)
numpy_array = X_test.values
X_test = torch.tensor(numpy_array, dtype=torch.float32)
numpy_array = Y_train.values
Y_train = torch.tensor(numpy_array, dtype=torch.float32).view(-1, 1)
numpy_array = Y_test.values
Y_test = torch.tensor(numpy_array, dtype=torch.float32).view(-1, 1)

Dataset size:  100000


##### Neural Network and training

In [81]:
class BinaryClassifier(nn.Module):
    """Neural Network to act as a Binary classifier for shield safety."""
    def __init__(self, dropout_probability=0.3):
        self.dropout = dropout_probability # Define dropout as a class atribute.
        super(BinaryClassifier, self).__init__() # Binary classifier used.
        self.hidden_layer_1 = nn.Linear(9, 64) # First hidden layer
        if self.dropout:
            self.dropout_layer_1 = nn.Dropout(p=self.dropout) # Dropout layer added if dropout active.
        self.hidden_layer_2 = nn.Linear(64, 32) # Second hidden layer
        if self.dropout:
            self.dropout_layer_2 = nn.Dropout(p=self.dropout) # Second dropout layer added if dropout active.
        self.output_layer = nn.Linear(32, 1) # Output layer
        self.sigmoid = nn.Sigmoid() # Sigmoid function
        self.relu = nn.ReLU() # Relu function

    def forward(self, x):
        # Feed forward through the network
        x = self.relu(self.hidden_layer_1(x))
        if self.dropout:
            x = self.dropout_layer_1(x)
        x = self.relu(self.hidden_layer_2(x))
        if self.dropout:
            x = self.dropout_layer_2(x)
        # Sigmoid function to output the probability of the experience being safe.
        x = self.sigmoid(self.output_layer(x))
        return x

In [82]:
dropouts = [False, 0.1, 0.2, 0.3] # Dropout probabilities tested (also no dropout when False)
for d in dropouts:
    print("Dropout:", d)
    # Create TensorDataset
    # Combines the input features and labels into a dataset object compatible with DataLoader.
    dataset = TensorDataset(X_train, Y_train)

    # Create a weighted random sampler to give more weight to the class with fewer samples (unsafe)
    class_counts = np.bincount(Y_train.squeeze().long()) # Count the number of instances of each class.
    class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float) # Calculate class weights.
    sample_weights = class_weights[Y_train.squeeze().long()] # Specify the weight for each sample.

    # Initialise the DataLoader
    dataloader = DataLoader(dataset, batch_size=32, sampler=WeightedRandomSampler(weights=sample_weights, num_samples=len(Y_train), replacement=True))

    # Initialise model, loss, and optimiser.
    model = BinaryClassifier(d)
    loss_ = nn.BCELoss() # Binary Cross-Entropy Loss
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    epochs = 50
    t1 = time.time()
    model.train()
    for epoch in range(epochs):
        for inputs, labels in dataloader:
            outputs = model(inputs)
            loss = loss_(outputs, labels)
            optimizer.zero_grad() # Zero gradients.
            loss.backward() # Backpropagate through model.
            optimizer.step() # Optimise model.
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')
    t2 = time.time()
    print(f"Trained in {format_time(t2 - t1)}", end="\n\n")

    thresholds = [0.05, 0.1, 0.2, 0.3, 0.4, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.6, 0.7, 0.8, 0.9, 0.95, 0.96, 0.97, 0.975, 0.985] # Safe probability thresholds tested.
    for th in thresholds:
        model.eval()
        with torch.no_grad():
            outputs = model(X_test)
            Y_pred = (outputs >= th).float()

        accuracy = accuracy_score(Y_test, Y_pred)
        recall = recall_score(Y_test, Y_pred, pos_label=0)
        print("Threshold: ", th)
        print(f"Acc: {accuracy*100:.2f}%   Recall: {recall*100:.2f}% = {(accuracy+recall)*100:.2f}")

    print("***************************************************************************************************************", end="\n\n")

Dropout: False
Epoch 10, Loss: 0.0373
Epoch 20, Loss: 0.0014
Epoch 30, Loss: 0.0025
Epoch 40, Loss: 0.0129
Epoch 50, Loss: 0.0058
Trained in 7 minutes, 56.09 seconds

Threshold:  0.05
Acc: 99.23%   Recall: 92.40% = 191.63
Threshold:  0.1
Acc: 99.23%   Recall: 96.00% = 195.23
Threshold:  0.2
Acc: 99.15%   Recall: 98.40% = 197.55
Threshold:  0.3
Acc: 99.03%   Recall: 98.40% = 197.43
Threshold:  0.4
Acc: 98.93%   Recall: 98.40% = 197.33
Threshold:  0.46
Acc: 98.91%   Recall: 98.80% = 197.70
Threshold:  0.47
Acc: 98.90%   Recall: 98.80% = 197.70
Threshold:  0.48
Acc: 98.89%   Recall: 98.80% = 197.69
Threshold:  0.49
Acc: 98.89%   Recall: 98.80% = 197.69
Threshold:  0.5
Acc: 98.88%   Recall: 98.80% = 197.68
Threshold:  0.51
Acc: 98.85%   Recall: 98.80% = 197.65
Threshold:  0.52
Acc: 98.83%   Recall: 98.80% = 197.63
Threshold:  0.53
Acc: 98.83%   Recall: 98.80% = 197.63
Threshold:  0.54
Acc: 98.81%   Recall: 98.80% = 197.61
Threshold:  0.55
Acc: 98.78%   Recall: 98.80% = 197.58
Threshold:  0