### Naive Bayes Classifier - Data Testing

In [8]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.utils import resample
import pandas as pd

def format_time(seconds):
    """Formats time in seconds into hours/minutes/seconds and returns a string of the resulting time."""
    minutes = int(seconds // 60)
    hours = int(minutes // 60)
    minutes = minutes % 60
    seconds = seconds % 60
    if hours > 0:
        return f"{hours} hours, {minutes} minute{'s' if minutes != 1 else ''}, {seconds:.2f} seconds"
    if minutes > 0:
        return f"{minutes} minute{'s' if minutes != 1 else ''}, {seconds:.2f} seconds"
    return f"{seconds:.2f} seconds"

In [26]:
# Import the data.
df = pd.read_csv("Shield_Experience_31.03.2025_13.41.csv")
print(f"Number of experiences: {len(df)}")

Number of experiences: 2951771


In [23]:
# Split data into batches by sets of 20k. Last 20k episodes excluded, as will not train shield again at the end of training.
Batch_1_1_to_20000 = df[df['Episode'].between(1, 20000 )]
Batch_2_20001_to_40000 = df[df['Episode'].between(20001, 40000)]
Batch_3_40001_to_60000 = df[df['Episode'].between(40001, 60000)]
Batch_4_60001_to_80000 = df[df['Episode'].between(60001, 80000)]
Batch_5_80001_to_100000 = df[df['Episode'].between(80001, 100000)]

batches = [Batch_1_1_to_20000, Batch_2_20001_to_40000, Batch_3_40001_to_60000, Batch_4_60001_to_80000, Batch_5_80001_to_100000]

saff = 0
unsaff = 0

for i in batches: # Check batch sizes.
    print("Batch total: ", len(i))
    
    zeros = len(i[i['Safe'] == 0])
    print("Unsafe: ", zeros,f" - {zeros/len(i)*100:.2f}%")
    ones = len(i[i['Safe'] == 1])
    saff += ones
    unsaff += zeros
    print("Safe: ", ones, f" - {ones/len(i)*100:.2f}%", end="\n\n" )
    # Safe and unsafe for each batch.

print(f"Safe total: {saff}, {saff/(saff+unsaff)*100:.2f}%   Unsafe total: {unsaff}, {unsaff/(saff+unsaff)*100:.2f}%")

Batch total:  604987
Unsafe:  19441  - 3.21%
Safe:  585546  - 96.79%

Batch total:  549861
Unsafe:  13559  - 2.47%
Safe:  536302  - 97.53%

Batch total:  457828
Unsafe:  3769  - 0.82%
Safe:  454059  - 99.18%

Batch total:  453604
Unsafe:  3788  - 0.84%
Safe:  449816  - 99.16%

Batch total:  450414
Unsafe:  4301  - 0.95%
Safe:  446113  - 99.05%

Safe total: 2471836, 98.22%   Unsafe total: 44858, 1.78%


In [11]:
def balance_samples(df, mult=1):
    """Function to take in a dataframe of samples and balance the data by undersampling the safe states.
       Baseline make it a 1:1 Ratio.
       The mult paramater changes this. 2 means 2:1 - Safe:Unsafe
       Mult of 0 returns the original data"""
    # What it does: Randomly reduce the number of Class A samples to match Class B (e.g., 1,000 each). [Notes to delete]
    # Pros: Simple, reduces training time, balances classes perfectly.
    # Cons: Throws away a lot of data (26,000 Class A samples), potentially losing valuable patterns.
    if mult == 0:
        # Original data returned.
        return df
    
    unsafe_exps = df[df['Safe'] == 0]
    safe_exps = df[df['Safe'] == 1]

    # Undersample the safe experiences with no duplicates to match the number of unsafe ones present.
    undersampled_safe_exps = resample(safe_exps,replace=False, n_samples=int(len(unsafe_exps)*mult), random_state=6)
    
    # Combine the unsafe with undersampled safe.
    return pd.concat([unsafe_exps, undersampled_safe_exps])

In [12]:
# Different thresholds and safe/unsafe ratio modifiers to be tested in grid search.
ths = [0.2, 0.3, 0.4, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.6, 0.7, 0.95, 0.96, 0.97, 0.975, 0.985]
mults = [0, 0.5, 0.75, 0.85, 0.9, 0.95, 1, 1.25, 1.5, 2, 3, 4, 5]

#### Multiomial Naive Bayes

In [13]:
# Train the Multiomial Naive Bayes five times with the five batches.
for mult in mults:
    print(f"Mult: {mult} ***********************************************************")
    acc_total = 0
    rec_tot = 0
    acc_total_th = 0
    rec_tot_th = 0
    classifier = MultinomialNB() # Initiate classifier
    for b_num, batch in enumerate(batches):
        # Variable/Classification Split
        X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
        Y = batch["Safe"]

        # Split into Train and test set
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

        # Recombine X_train and Y_train for balancing
        X_train = X_train.copy() # To nullify Pandas view warning
        X_train["Safe"] = Y_train

        # Balance the data
        balanced_data = balance_samples(X_train, mult)

        # Split out data back into X_train and Y_train again
        Y_train = balanced_data["Safe"]
        X_train = balanced_data.drop("Safe", axis=1)

        # Train Classifier
        classifier.partial_fit(X_train, Y_train, classes=[0, 1])

        # Make predictions using classifier
        Y_pred = classifier.predict(X_test)
        accuracy = accuracy_score(Y_test, Y_pred)
        recall = recall_score(Y_test, Y_pred, pos_label=0)
        acc_total += accuracy
        rec_tot += recall

    print(f"Standard: Mult {mult}    Average accuracy: {acc_total/5*100:.2f}% Average recall: {rec_tot/5*100:.2f}% {(acc_total/5*100)+(rec_tot/5*100):.2f}")

print()
print(" ___________________ With Thresholds ___________________")
for mult in mults:
    print(f"Mult: {mult} ***********************************************************")
    for th in ths:
        acc_total = 0
        rec_tot = 0
        acc_total_th = 0
        rec_tot_th = 0
        classifier = MultinomialNB() # Initiate classifier
        for b_num, batch in enumerate(batches):
            # Variable/Classification Split
            X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
            Y = batch["Safe"]

            # Split into Train and test set
            X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

            # Recombine X_train and Y_train for balancing
            X_train = X_train.copy() # To nullify Pandas view warning
            X_train["Safe"] = Y_train

            # Balance the data
            balanced_data = balance_samples(X_train, mult)

            # Split out data back into X_train and Y_train again
            Y_train = balanced_data["Safe"]
            X_train = balanced_data.drop("Safe", axis=1)

            # Train Classifier
            classifier.partial_fit(X_train, Y_train, classes=[0, 1])

            # Make predictions using classifier
            Y_pred = classifier.predict(X_test)
            accuracy = accuracy_score(Y_test, Y_pred)
            recall = recall_score(Y_test, Y_pred, pos_label=0)
            acc_total += accuracy
            rec_tot += recall
            
            # Using a threshold for the safe class
            threshold = th
            Y_probs = classifier.predict_proba(X_test)
            Y_pred_custom = (Y_probs[:, 1] >= th).astype(int)
            accuracy = accuracy_score(Y_test, Y_pred_custom)
            recall = recall_score(Y_test, Y_pred_custom, pos_label=0)
            acc_total_th += accuracy
            rec_tot_th += recall
            
        print(f"Mult: {mult} Thresh: {th}  Acc: {acc_total_th/5*100:.2f}%   Recall: {rec_tot_th/5*100:.2f}% = {(acc_total_th/5*100)+(rec_tot_th/5*100):.2f}")

Mult: 0 ***********************************************************
Standard: Mult 0    Average accuracy: 98.32% Average recall: 0.00% 98.32
Mult: 0.5 ***********************************************************
Standard: Mult 0.5    Average accuracy: 1.68% Average recall: 100.00% 101.68
Mult: 0.75 ***********************************************************
Standard: Mult 0.75    Average accuracy: 1.75% Average recall: 100.00% 101.75
Mult: 0.85 ***********************************************************
Standard: Mult 0.85    Average accuracy: 9.12% Average recall: 99.78% 108.91
Mult: 0.9 ***********************************************************
Standard: Mult 0.9    Average accuracy: 29.18% Average recall: 96.67% 125.85
Mult: 0.95 ***********************************************************
Standard: Mult 0.95    Average accuracy: 42.00% Average recall: 87.42% 129.41
Mult: 1 ***********************************************************
Standard: Mult 1    Average accuracy: 50.86% Averag

Mult: 0.9 Thresh: 0.97  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 0.9 Thresh: 0.975  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 0.9 Thresh: 0.985  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 0.95 ***********************************************************
Mult: 0.95 Thresh: 0.2  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 0.95 Thresh: 0.3  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 0.95 Thresh: 0.4  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 0.95 Thresh: 0.46  Acc: 74.82%   Recall: 29.46% = 104.28
Mult: 0.95 Thresh: 0.47  Acc: 65.15%   Recall: 49.59% = 114.74
Mult: 0.95 Thresh: 0.48  Acc: 56.41%   Recall: 67.72% = 124.13
Mult: 0.95 Thresh: 0.49  Acc: 48.77%   Recall: 76.47% = 125.24
Mult: 0.95 Thresh: 0.5  Acc: 42.00%   Recall: 87.42% = 129.41
Mult: 0.95 Thresh: 0.51  Acc: 32.28%   Recall: 94.79% = 127.07
Mult: 0.95 Thresh: 0.52  Acc: 20.08%   Recall: 99.02% = 119.10
Mult: 0.95 Thresh: 0.53  Acc: 7.40%   Recall: 99.87% = 107.27
Mult: 0.95 Thresh: 0.54  Acc: 3.16%   Recall: 99.99% = 103.

Mult: 4 Thresh: 0.49  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.5  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.51  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.52  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.53  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.54  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.55  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.6  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.7  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 4 Thresh: 0.95  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.96  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.97  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.975  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.985  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 5 ***********************************************************
Mult: 5 Thresh: 0.2  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 5 Thresh: 0.3  Acc: 98.32%   Recall: 0.00% = 98.

#### With Unsafe Class as Threshold

In [14]:
# Train the Multiomial Naive Bayes five times with the five batches.
for mult in mults:
    print(f"Mult: {mult} ***********************************************************")
    acc_total = 0
    rec_tot = 0
    acc_total_th = 0
    rec_tot_th = 0
    classifier = MultinomialNB() # Initiate classifier
    for b_num, batch in enumerate(batches):
        # Variable/Classification Split
        X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
        Y = batch["Safe"]

        # Split into Train and test set
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

        # Recombine X_train and Y_train for balancing
        X_train = X_train.copy() # To nullify Pandas view warning
        X_train["Safe"] = Y_train

        # Balance the data
        balanced_data = balance_samples(X_train, mult)

        # Split out data back into X_train and Y_train again
        Y_train = balanced_data["Safe"]
        X_train = balanced_data.drop("Safe", axis=1)

        # Train Classifier
        classifier.partial_fit(X_train, Y_train, classes=[0, 1])

        # Make predictions using classifier
        Y_pred = classifier.predict(X_test)
        accuracy = accuracy_score(Y_test, Y_pred)
        recall = recall_score(Y_test, Y_pred, pos_label=0)
        acc_total += accuracy
        rec_tot += recall

    print(f"Mult {mult} Average accuracy: {acc_total/5*100:.2f}% Average recall: {rec_tot/5*100:.2f}% = {(acc_total/5*100)+(rec_tot/5*100):.2f}")

print()
print(" ___________________ With Thresholds ___________________")
for mult in mults:
    print(f"Mult: {mult} ***********************************************************")
    for th in ths:
        acc_total = 0
        rec_tot = 0
        acc_total_th = 0
        rec_tot_th = 0
        classifier = MultinomialNB() # Initiate classifier
        for b_num, batch in enumerate(batches):
            # Variable/Classification Split
            X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
            Y = batch["Safe"]

            # Split into Train and test set
            X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

            # Recombine X_train and Y_train for balancing
            X_train = X_train.copy() # To nullify Pandas view warning
            X_train["Safe"] = Y_train

            # Balance the data
            balanced_data = balance_samples(X_train, mult)

            # Split out data back into X_train and Y_train again
            Y_train = balanced_data["Safe"]
            X_train = balanced_data.drop("Safe", axis=1)

            # Train Classifier
            classifier.partial_fit(X_train, Y_train, classes=[0, 1])

            # Make predictions using classifier
            Y_pred = classifier.predict(X_test)
            accuracy = accuracy_score(Y_test, Y_pred)
            recall = recall_score(Y_test, Y_pred, pos_label=0)
            #print(f"Standard: Unsafe Recall: {recall:.2f} Accuracy: {accuracy*100:.2f}%")
            acc_total += accuracy
            rec_tot += recall
            
            # Using a threshold for the safe class
            threshold = th
            Y_probs = classifier.predict_proba(X_test)
            Y_pred_custom = (Y_probs[:, 0] >= th).astype(int)
            accuracy = accuracy_score(Y_test, Y_pred_custom)
            recall = recall_score(Y_test, Y_pred_custom, pos_label=0)
            acc_total_th += accuracy
            rec_tot_th += recall
                            
        print(f"Mult: {mult} Thresh: {th}  Acc: {acc_total_th/5*100:.2f}%   Recall: {rec_tot_th/5*100:.2f}% = {(acc_total_th/5*100)+(rec_tot_th/5*100):.2f}")

Mult: 0 ***********************************************************
Mult 0 Average accuracy: 98.32% Average recall: 0.00% = 98.32
Mult: 0.5 ***********************************************************
Mult 0.5 Average accuracy: 1.68% Average recall: 100.00% = 101.68
Mult: 0.75 ***********************************************************
Mult 0.75 Average accuracy: 1.75% Average recall: 100.00% = 101.75
Mult: 0.85 ***********************************************************
Mult 0.85 Average accuracy: 9.12% Average recall: 99.78% = 108.91
Mult: 0.9 ***********************************************************
Mult 0.9 Average accuracy: 29.18% Average recall: 96.67% = 125.85
Mult: 0.95 ***********************************************************
Mult 0.95 Average accuracy: 42.00% Average recall: 87.42% = 129.41
Mult: 1 ***********************************************************
Mult 1 Average accuracy: 50.86% Average recall: 74.96% = 125.83
Mult: 1.25 ******************************************

Mult: 0.95 Thresh: 0.2  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 0.95 Thresh: 0.3  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 0.95 Thresh: 0.4  Acc: 98.32%   Recall: 0.00% = 98.32
Mult: 0.95 Thresh: 0.46  Acc: 96.84%   Recall: 0.01% = 96.84
Mult: 0.95 Thresh: 0.47  Acc: 92.60%   Recall: 0.13% = 92.73
Mult: 0.95 Thresh: 0.48  Acc: 79.92%   Recall: 0.98% = 80.90
Mult: 0.95 Thresh: 0.49  Acc: 67.72%   Recall: 5.21% = 72.93
Mult: 0.95 Thresh: 0.5  Acc: 58.00%   Recall: 12.58% = 70.59
Mult: 0.95 Thresh: 0.51  Acc: 51.23%   Recall: 23.53% = 74.76
Mult: 0.95 Thresh: 0.52  Acc: 43.59%   Recall: 32.28% = 75.87
Mult: 0.95 Thresh: 0.53  Acc: 34.85%   Recall: 50.41% = 85.26
Mult: 0.95 Thresh: 0.54  Acc: 25.18%   Recall: 70.54% = 95.72
Mult: 0.95 Thresh: 0.55  Acc: 14.99%   Recall: 83.79% = 98.78
Mult: 0.95 Thresh: 0.6  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 0.95 Thresh: 0.7  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 0.95 Thresh: 0.95  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 0.95 Thresh:

Mult: 4 Thresh: 0.52  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.53  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.54  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.55  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.6  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.7  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.95  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.96  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.97  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.975  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 4 Thresh: 0.985  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 5 ***********************************************************
Mult: 5 Thresh: 0.2  Acc: 3.33%   Recall: 95.14% = 98.47
Mult: 5 Thresh: 0.3  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 5 Thresh: 0.4  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 5 Thresh: 0.46  Acc: 1.68%   Recall: 100.00% = 101.68
Mult: 5 Thresh: 0.47  Acc: 1.68%   Re

#### Gaussian Naive Bayes

In [18]:
# Train the Gaussian Naive Bayes five times with the five batches.
for mult in mults:
    print(f"Mult: {mult} ***********************************************************")
    acc_total = 0
    rec_tot = 0
    acc_total_th = 0
    rec_tot_th = 0
    classifier = GaussianNB() # Initiate classifier
    for b_num, batch in enumerate(batches):
        # Variable/Classification Split
        X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
        Y = batch["Safe"]

        # Split into Train and test set
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

        # Recombine X_train and Y_train for balancing
        X_train = X_train.copy() # To nullify Pandas view warning
        X_train["Safe"] = Y_train

        # Balance the data
        balanced_data = balance_samples(X_train, mult)

        # Split out data back into X_train and Y_train again
        Y_train = balanced_data["Safe"]
        X_train = balanced_data.drop("Safe", axis=1)

        # Train Classifier
        classifier.partial_fit(X_train, Y_train, classes=[0, 1])

        # Make predictions using classifier
        Y_pred = classifier.predict(X_test)
        accuracy = accuracy_score(Y_test, Y_pred)
        recall = recall_score(Y_test, Y_pred, pos_label=0)
        acc_total += accuracy
        rec_tot += recall

    print(f"Mult {mult}  Average accuracy: {acc_total/5*100:.2f}% Average recall: {rec_tot/5*100:.2f}%  = {(acc_total/5*100)+(rec_tot/5*100):.2f}")

print()
print(" ___________________ With Thresholds ___________________")
for mult in mults:
    print(f"Mult: {mult} ***********************************************************")
    for th in ths:
        acc_total = 0
        rec_tot = 0
        acc_total_th = 0
        rec_tot_th = 0
        classifier = GaussianNB() # Initiate classifier
        for b_num, batch in enumerate(batches):
            # Variable/Classification Split
            X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
            Y = batch["Safe"]

            # Split into Train and test set
            X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

            # Recombine X_train and Y_train for balancing
            X_train = X_train.copy() # To nullify Pandas view warning
            X_train["Safe"] = Y_train

            # Balance the data
            balanced_data = balance_samples(X_train, mult)

            # Split out data back into X_train and Y_train again
            Y_train = balanced_data["Safe"]
            X_train = balanced_data.drop("Safe", axis=1)

            # Train Classifier
            classifier.partial_fit(X_train, Y_train, classes=[0, 1])

            # Make predictions using classifier
            Y_pred = classifier.predict(X_test)
            accuracy = accuracy_score(Y_test, Y_pred)
            recall = recall_score(Y_test, Y_pred, pos_label=0)
            acc_total += accuracy
            rec_tot += recall
            
            # Using a threshold for the safe class
            threshold = th
            Y_probs = classifier.predict_proba(X_test)
            Y_pred_custom = (Y_probs[:, 1] >= th).astype(int)
            accuracy = accuracy_score(Y_test, Y_pred_custom)
            recall = recall_score(Y_test, Y_pred_custom, pos_label=0)
            acc_total_th += accuracy
            rec_tot_th += recall
            
        print(f"Mult: {mult} Thresh: {th}  Acc: {acc_total_th/5*100:.2f}% Average recall: {rec_tot_th/5*100:.2f}% = {(acc_total_th/5*100)+(rec_tot_th/5*100):.2f}")

Mult: 0 ***********************************************************
Mult 0  Average accuracy: 98.32% Average recall: 0.00%  = 98.32
Mult: 0.5 ***********************************************************
Mult 0.5  Average accuracy: 38.95% Average recall: 94.91%  = 133.87
Mult: 0.75 ***********************************************************
Mult 0.75  Average accuracy: 46.78% Average recall: 85.14%  = 131.92
Mult: 0.85 ***********************************************************
Mult 0.85  Average accuracy: 49.26% Average recall: 81.98%  = 131.24
Mult: 0.9 ***********************************************************
Mult 0.9  Average accuracy: 50.42% Average recall: 80.55%  = 130.97
Mult: 0.95 ***********************************************************
Mult 0.95  Average accuracy: 51.96% Average recall: 79.43%  = 131.40
Mult: 1 ***********************************************************
Mult 1  Average accuracy: 53.12% Average recall: 77.65%  = 130.77
Mult: 1.25 ***************************

Mult: 0.9 Thresh: 0.53  Acc: 48.04% Average recall: 83.01% = 131.04
Mult: 0.9 Thresh: 0.54  Acc: 47.19% Average recall: 84.65% = 131.84
Mult: 0.9 Thresh: 0.55  Acc: 46.24% Average recall: 85.32% = 131.56
Mult: 0.9 Thresh: 0.6  Acc: 42.13% Average recall: 90.65% = 132.78
Mult: 0.9 Thresh: 0.7  Acc: 35.08% Average recall: 98.18% = 133.26
Mult: 0.9 Thresh: 0.95  Acc: 1.68% Average recall: 100.00% = 101.68
Mult: 0.9 Thresh: 0.96  Acc: 1.68% Average recall: 100.00% = 101.68
Mult: 0.9 Thresh: 0.97  Acc: 1.68% Average recall: 100.00% = 101.68
Mult: 0.9 Thresh: 0.975  Acc: 1.68% Average recall: 100.00% = 101.68
Mult: 0.9 Thresh: 0.985  Acc: 1.68% Average recall: 100.00% = 101.68
Mult: 0.95 ***********************************************************
Mult: 0.95 Thresh: 0.2  Acc: 83.16% Average recall: 19.93% = 103.09
Mult: 0.95 Thresh: 0.3  Acc: 75.39% Average recall: 41.92% = 117.31
Mult: 0.95 Thresh: 0.4  Acc: 63.52% Average recall: 63.36% = 126.89
Mult: 0.95 Thresh: 0.46  Acc: 56.11% Average 

Mult: 3 Thresh: 0.49  Acc: 79.88% Average recall: 27.61% = 107.49
Mult: 3 Thresh: 0.5  Acc: 79.39% Average recall: 28.31% = 107.71
Mult: 3 Thresh: 0.51  Acc: 78.92% Average recall: 29.68% = 108.60
Mult: 3 Thresh: 0.52  Acc: 78.44% Average recall: 31.71% = 110.15
Mult: 3 Thresh: 0.53  Acc: 77.93% Average recall: 33.53% = 111.46
Mult: 3 Thresh: 0.54  Acc: 77.46% Average recall: 35.79% = 113.26
Mult: 3 Thresh: 0.55  Acc: 76.93% Average recall: 38.06% = 114.99
Mult: 3 Thresh: 0.6  Acc: 72.98% Average recall: 46.37% = 119.35
Mult: 3 Thresh: 0.7  Acc: 60.07% Average recall: 69.67% = 129.74
Mult: 3 Thresh: 0.95  Acc: 4.21% Average recall: 99.90% = 104.11
Mult: 3 Thresh: 0.96  Acc: 2.33% Average recall: 100.00% = 102.33
Mult: 3 Thresh: 0.97  Acc: 1.69% Average recall: 100.00% = 101.69
Mult: 3 Thresh: 0.975  Acc: 1.68% Average recall: 100.00% = 101.68
Mult: 3 Thresh: 0.985  Acc: 1.68% Average recall: 100.00% = 101.68
Mult: 4 ***********************************************************
Mult: 4 Th

#### With Unsafe Class as Threshold

In [16]:
# Train the Gaussian Naive Bayes five times with the five batches.
for mult in mults:
    print(f"Mult: {mult} ***********************************************************")
    acc_total = 0
    rec_tot = 0
    acc_total_th = 0
    rec_tot_th = 0
    classifier = GaussianNB() # Initiate classifier
    for b_num, batch in enumerate(batches):
        # Variable/Classification Split
        X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
        Y = batch["Safe"]

        # Split into Train and test set
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

        # Recombine X_train and Y_train for balancing
        X_train = X_train.copy() # To nullify Pandas view warning
        X_train["Safe"] = Y_train

        # Balance the data
        balanced_data = balance_samples(X_train, mult)

        # Split out data back into X_train and Y_train again
        Y_train = balanced_data["Safe"]
        X_train = balanced_data.drop("Safe", axis=1)

        # Train Classifier
        classifier.partial_fit(X_train, Y_train, classes=[0, 1])

        # Make predictions using classifier
        Y_pred = classifier.predict(X_test)
        accuracy = accuracy_score(Y_test, Y_pred)
        recall = recall_score(Y_test, Y_pred, pos_label=0)
        acc_total += accuracy
        rec_tot += recall

    print(f"Mult {mult} Average accuracy: {acc_total/5*100:.2f}% Average recall: {rec_tot/5*100:.2f}%  = {(acc_total/5*100)+(rec_tot/5*100):.2f}")

print()
print(" ___________________ With Thresholds ___________________")
for mult in mults:
    print(f"Mult: {mult} ***********************************************************")
    for th in ths:
        acc_total = 0
        rec_tot = 0
        acc_total_th = 0
        rec_tot_th = 0
        classifier = GaussianNB() # Initiate classifier
        for b_num, batch in enumerate(batches):
            # Variable/Classification Split
            X = batch.drop(df.columns[[0, 1, 11]], axis=1) # Drop columns 0, 1 and 12 (index, episode, safe)
            Y = batch["Safe"]

            # Split into Train and test set
            X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=6)

            # Recombine X_train and Y_train for balancing
            X_train = X_train.copy() # To nullify Pandas view warning
            X_train["Safe"] = Y_train

            # Balance the data
            balanced_data = balance_samples(X_train, mult)

            # Split out data back into X_train and Y_train again
            Y_train = balanced_data["Safe"]
            X_train = balanced_data.drop("Safe", axis=1)

            # Train Classifier
            classifier.partial_fit(X_train, Y_train, classes=[0, 1])

            # Make predictions using classifier
            Y_pred = classifier.predict(X_test)
            accuracy = accuracy_score(Y_test, Y_pred)
            recall = recall_score(Y_test, Y_pred, pos_label=0)
            acc_total += accuracy
            rec_tot += recall
            
            # Using a threshold for the safe class
            threshold = th
            Y_probs = classifier.predict_proba(X_test)
            Y_pred_custom = (Y_probs[:, 0] >= th).astype(int)
            accuracy = accuracy_score(Y_test, Y_pred_custom)
            recall = recall_score(Y_test, Y_pred_custom, pos_label=0)
            acc_total_th += accuracy
            rec_tot_th += recall
            
        print(f"Mult: {mult} Thresh: {th}  Acc: {acc_total_th/5*100:.2f}% Average recall: {rec_tot_th/5*100:.2f}% =  = {(acc_total_th/5*100)+(rec_tot_th/5*100):.2f}")

Mult: 0 ***********************************************************
Mult 0 Average accuracy: 98.32% Average recall: 0.00%  = 98.32
Mult: 0.5 ***********************************************************
Mult 0.5 Average accuracy: 38.95% Average recall: 94.91%  = 133.87
Mult: 0.75 ***********************************************************
Mult 0.75 Average accuracy: 46.78% Average recall: 85.14%  = 131.92
Mult: 0.85 ***********************************************************
Mult 0.85 Average accuracy: 49.26% Average recall: 81.98%  = 131.24
Mult: 0.9 ***********************************************************
Mult 0.9 Average accuracy: 50.42% Average recall: 80.55%  = 130.97
Mult: 0.95 ***********************************************************
Mult 0.95 Average accuracy: 51.96% Average recall: 79.43%  = 131.40
Mult: 1 ***********************************************************
Mult 1 Average accuracy: 53.12% Average recall: 77.65%  = 130.77
Mult: 1.25 **********************************

Mult: 0.9 Thresh: 0.5  Acc: 49.58% Average recall: 19.45% =  = 69.03
Mult: 0.9 Thresh: 0.51  Acc: 48.41% Average recall: 20.43% =  = 68.84
Mult: 0.9 Thresh: 0.52  Acc: 47.53% Average recall: 21.46% =  = 68.99
Mult: 0.9 Thresh: 0.53  Acc: 46.39% Average recall: 22.63% =  = 69.03
Mult: 0.9 Thresh: 0.54  Acc: 45.53% Average recall: 23.64% =  = 69.17
Mult: 0.9 Thresh: 0.55  Acc: 44.21% Average recall: 25.11% =  = 69.32
Mult: 0.9 Thresh: 0.6  Acc: 38.24% Average recall: 33.19% =  = 71.43
Mult: 0.9 Thresh: 0.7  Acc: 25.59% Average recall: 55.66% =  = 81.25
Mult: 0.9 Thresh: 0.95  Acc: 1.69% Average recall: 100.00% =  = 101.68
Mult: 0.9 Thresh: 0.96  Acc: 1.68% Average recall: 100.00% =  = 101.68
Mult: 0.9 Thresh: 0.97  Acc: 1.68% Average recall: 100.00% =  = 101.68
Mult: 0.9 Thresh: 0.975  Acc: 1.68% Average recall: 100.00% =  = 101.68
Mult: 0.9 Thresh: 0.985  Acc: 1.68% Average recall: 100.00% =  = 101.68
Mult: 0.95 ***********************************************************
Mult: 0.95 Thre

Mult: 3 Thresh: 0.2  Acc: 53.31% Average recall: 14.66% =  = 67.97
Mult: 3 Thresh: 0.3  Acc: 39.93% Average recall: 30.33% =  = 70.26
Mult: 3 Thresh: 0.4  Acc: 27.02% Average recall: 53.63% =  = 80.65
Mult: 3 Thresh: 0.46  Acc: 22.54% Average recall: 64.21% =  = 86.74
Mult: 3 Thresh: 0.47  Acc: 22.07% Average recall: 66.47% =  = 88.54
Mult: 3 Thresh: 0.48  Acc: 21.56% Average recall: 68.29% =  = 89.85
Mult: 3 Thresh: 0.49  Acc: 21.08% Average recall: 70.32% =  = 91.40
Mult: 3 Thresh: 0.5  Acc: 20.61% Average recall: 71.69% =  = 92.29
Mult: 3 Thresh: 0.51  Acc: 20.12% Average recall: 72.39% =  = 92.51
Mult: 3 Thresh: 0.52  Acc: 19.45% Average recall: 72.87% =  = 92.32
Mult: 3 Thresh: 0.53  Acc: 18.90% Average recall: 74.04% =  = 92.95
Mult: 3 Thresh: 0.54  Acc: 18.19% Average recall: 75.88% =  = 94.07
Mult: 3 Thresh: 0.55  Acc: 17.58% Average recall: 77.58% =  = 95.16
Mult: 3 Thresh: 0.6  Acc: 13.85% Average recall: 84.33% =  = 98.18
Mult: 3 Thresh: 0.7  Acc: 6.04% Average recall: 95.52