# Neuronales Netz

In [213]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import math
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

import matplotlib.pyplot as plt

In [214]:
numOfFeaturesAtStart = 6

In [215]:
numOfFeaturesAddedToSections = 1

In [216]:
class SimplifiedModel(nn.Module):
    def __init__(self, n_layers = 3, hiddenLayerNumNodes = 64):
        super(SimplifiedModel, self).__init__()
        
        self.numLayers = n_layers

        self.fcIn = nn.Linear(numOfFeaturesAtStart + 12 * (19 + numOfFeaturesAddedToSections), 64)
        self.fcStart = nn.Linear(64, hiddenLayerNumNodes)
        self.fcInner = nn.Linear(hiddenLayerNumNodes,hiddenLayerNumNodes)
        self.fcEnd = nn.Linear(hiddenLayerNumNodes, 16)
        self.fc_out = nn.Linear(16, 1)     
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

        self.dropout = nn.Dropout(p=0)

    def forward(self, x):
        # x = x.view(x.size(0), -1) 

        x = self.relu(self.fcIn(x))
        x = self.relu(self.fcStart(x))

        for i in range(self.numLayers):
            x = self.relu(self.fcInner(x))
            if i == round(self.numLayers / 2):
                pass ## take out for final modell
                x = self.dropout(x)

        x = self.relu(self.fcEnd(x))
        x = self.sigmoid(self.fc_out(x))        
        
        return x

In [217]:
from torchsummary import summary

summaryModel = SimplifiedModel().to(torch.device('cuda'))
summary(summaryModel, input_size=(1, numOfFeaturesAtStart + 12 * (19 + numOfFeaturesAddedToSections)))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 64]          15,808
              ReLU-2                [-1, 1, 64]               0
            Linear-3                [-1, 1, 64]           4,160
              ReLU-4                [-1, 1, 64]               0
            Linear-5                [-1, 1, 64]           4,160
              ReLU-6                [-1, 1, 64]               0
            Linear-7                [-1, 1, 64]           4,160
              ReLU-8                [-1, 1, 64]               0
            Linear-9                [-1, 1, 64]           4,160
             ReLU-10                [-1, 1, 64]               0
          Dropout-11                [-1, 1, 64]               0
           Linear-12                [-1, 1, 16]           1,040
             ReLU-13                [-1, 1, 16]               0
           Linear-14                 [-

In [218]:
df_data = pd.read_json('datasets/data.json', orient='records')
df_control = pd.read_json('datasets/control.json', orient='records')
df = pd.concat([df_data, df_control], ignore_index=True)

In [219]:
print(len(df_data))

1700


In [220]:
print(len(df_control))

1726


In [221]:
X = df['sections'].values
numSections = df['numSections'].values
fullEntropy = df['fullEntropy'].values
minEntropy = df['minEntropy'].values
maxEntropy = df['maxEntropy'].values
X_entropyList = df['entropyList'].values
y = df['label'].values
y = [label for label in y] * 2

In [222]:
X = X.tolist()
numSections = numSections.tolist()
fullEntropy = fullEntropy.tolist()
minEntropy = minEntropy.tolist()
maxEntropy = maxEntropy.tolist()

In [223]:
averageChunkEntropy = [0] * len(X_entropyList)
for i in range(len(X_entropyList)):
    averageChunkEntropy[i] = sum(X_entropyList[i]) / len(X_entropyList[i])

In [224]:
chunkEntropyVariance = [0] * len(X_entropyList)
for i in range(len(X_entropyList)):
    chunkEntropyVariance[i] = np.var(X_entropyList[i])

In [225]:
fileLength = [0] * len(X_entropyList)
for i in range(len(X_entropyList)):
    fileLength[i] = 2048 * len(X_entropyList)

In [226]:
numSectionsAboveCertainEntropy = [0] * len(X)
for i in range(len(X)):
    numSectionsAboveCertainEntropy[i] = sum([1 for item in X[i] if item[-1] > 7.5])

In [227]:
for i in range(len(X)):
        for entry in X[i]:
                entry[-2] /= 1e+8
                entry[-1] /= 8
                entry.append(entry[-1] / (fullEntropy[i] + 1e-8 / 8))
        X[i].insert(0,numSections[i] / 12)
        X[i].insert(0,chunkEntropyVariance[i] / 8)
        X[i].insert(0,averageChunkEntropy[i] / 8)
        X[i].insert(0,fullEntropy[i] / 8)
        X[i].insert(0,minEntropy[i] / 8)
        X[i].insert(0,maxEntropy[i] / 8)
        # X[i].insert(0,numSectionsAboveCertainEntropy[i] / 12)
        # X[i].insert(0, fileLength[i] / 1e+8)

In [228]:
print(X[0])

[0.988422284825, 0.125, 0.7862497308625, 0.6411169861333575, 0.15293418837774853, 0.8333333333333334, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00638976, 0.787857733825, 0.1252556444127756], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0.00026624, 0.557630927725, 0.08865359594503026], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00682496, 0.7001793097, 0.11131630353513318], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 8.192e-05, 0.596506976775, 0.09483420999109614], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.12e-06, 0.42838821945, 0.06810625850627224], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.072e-05, 0.6516375855375, 0.10359901565453764], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 5.12e-06, 0.025865002225, 0.004112084431413176], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 5.12e-06, 0.0, 0.0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [229]:
import random
X2 = X
entropy_range = 0.5 / 8
byte_range = 250 / 1e+8
for i in range(len(X2)):
    for e, entry in enumerate(X2[i]):
        if e <= numOfFeaturesAtStart - 1:
            continue
        entry[-3] += random.uniform(-byte_range, byte_range)
        entry[-2] += random.uniform(-entropy_range, entropy_range)
        entry[-1] += random.uniform(-entropy_range, entropy_range)
    for e in range(numOfFeaturesAtStart):
        if e == 5:
            continue
        if e == -1:
            X2[i][e] += random.uniform(-byte_range, byte_range)
            continue
        X2[i][e] += random.uniform(-entropy_range, entropy_range)
    # X2[i][4] += random.uniform(-entropy_range, entropy_range)

In [230]:
print(X2[0])

[1.0190473624666445, 0.09487814920594037, 0.82228808172711, 0.5945675229865063, 0.19345136115877204, 0.8333333333333334, [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.006390180728455442, 0.8334585688305456, 0.18716859833941563], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0.0002673459054944122, 0.6093101566531263, 0.12383657283284338], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.006825033769714252, 0.656685436019955, 0.0754931376458979], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 8.094396141949161e-05, 0.6355721427389771, 0.11661066382674688], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1.7914754516801803e-06, -0.04346671872644907, 0.011503486434021479], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7.004489821659803e-06, 0.45894714973569417, 0.09163474058150978], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.0075819197213786e-05, 0.6697587914395395, 0.07703989479427846], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3.726

In [231]:
X += X2

In [232]:
print(len(X[0]))

18


In [233]:
for i in range(len(X)):
    X[i] = X[i][0:numOfFeaturesAtStart] + [item for sublist in X[i][numOfFeaturesAtStart:] for item in sublist] ## X[i][0:5] + 

In [234]:
print(X[0])
print(len(X[0]))

[1.0190473624666445, 0.09487814920594037, 0.82228808172711, 0.5945675229865063, 0.19345136115877204, 0.8333333333333334, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.006390180728455442, 0.8334585688305456, 0.18716859833941563, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0.0002673459054944122, 0.6093101566531263, 0.12383657283284338, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.006825033769714252, 0.656685436019955, 0.0754931376458979, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 8.094396141949161e-05, 0.6355721427389771, 0.11661066382674688, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1.7914754516801803e-06, -0.04346671872644907, 0.011503486434021479, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7.004489821659803e-06, 0.45894714973569417, 0.09163474058150978, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3.0075819197213786e-05, 0.6697587914395395, 0.07703989479427846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3.726289856938815e-0

In [235]:
batch_size = 64

In [236]:
def generateNewSplit():
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [237]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Training the model

In [238]:
withOptuna = True
testMore = False

In [239]:
def calculate_recall(y_true, y_pred):
    true_positives = (y_true * y_pred).sum()
    false_negatives = (y_true * (1 - y_pred)).sum()
    
    recall = true_positives / (true_positives + false_negatives + 1e-8)
    return recall.item()

In [240]:
def plot_loss_accuracy(losses, accuracies, val_losses, val_accuracies, recalls, val_recalls):
    epochs = range(1, len(losses) + 1)

    plt.figure(figsize=(15, 5))

    plt.subplot(1, 3, 1)
    plt.plot(epochs, losses, label='Loss', color='slategrey')
    plt.plot(epochs, val_losses, label='Validation Loss', color='cornflowerblue')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid(True)

    plt.subplot(1, 3, 2)
    plt.plot(epochs, accuracies, label='Accuracy', color='seagreen')
    plt.plot(epochs, val_accuracies, label='Validation Accuracy', color='lime')
    plt.title('Training Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.grid(True)

    plt.subplot(1, 3, 3)
    plt.plot(epochs, recalls, label='Recall', color='seagreen')
    plt.plot(epochs, val_recalls, label='Validation Recall', color='lime')
    plt.title('Training Recall')
    plt.xlabel('Epoch')
    plt.ylabel('Recall')
    plt.grid(True)

    plt.tight_layout()
    plt.show()

In [241]:
def train(model, criterion, optimizer, scheduler, device, num_epochs=10, trial=None):
    
    all_losses = []
    all_accuracies = []
    all_val_losses = []
    all_val_accuracies = []
    all_recalls = []
    all_val_recalls = []

    for epoch in range(num_epochs):

        fold_results = []

        for fold, (train_index, val_index) in enumerate(kf.split(X_train_tensor)):

            X_train, X_val = X_train_tensor[train_index], X_train_tensor[val_index]
            y_train, y_val = y_train_tensor[train_index], y_train_tensor[val_index]

            train_dataset = TensorDataset(X_train, y_train)
            val_dataset = TensorDataset(X_val, y_val)
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

            model.train()
            
            running_loss = 0.0
            correct_predictions = 0
            total_samples = 0
            fold_recall = 0.0

            for inputs, targets in train_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                optimizer.zero_grad()

                outputs = model(inputs)

                loss = criterion(outputs.squeeze(), targets.float())
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

                predicted = (outputs.squeeze() > 0.5).float()
                correct_predictions += (predicted == targets).sum().item()
                total_samples += targets.size(0)

                fold_recall += calculate_recall(targets, predicted)

            scheduler.step()

            model.eval()

            val_loss = 0
            correct_val_predictions = 0
            fold_val_recall = 0
            total_val_samples = 0
            with torch.no_grad():
                for inputs, targets in val_loader:
                    inputs, targets = inputs.to(device), targets.to(device)
                    outputs = model(inputs)

                    loss = criterion(outputs.squeeze(), targets.float())
                    val_loss += loss.item()

                    predicted = (outputs.squeeze() > 0.5).float()
                    correct_val_predictions += (predicted == targets).sum().item()
                    total_val_samples += targets.size(0)

                    fold_val_recall += calculate_recall(targets, predicted)


            fold_loss = running_loss / len(train_loader)
            fold_accuracy = correct_predictions / total_samples
            fold_recall /= len(train_loader)
            fold_val_loss = val_loss / len(val_loader)
            fold_val_accuracy = correct_val_predictions / total_val_samples
            fold_val_recall /= len(val_loader)

            fold_combined_accuracy = (correct_predictions + correct_val_predictions) / (total_samples + total_val_samples)

            fold_results.append((fold_loss, fold_accuracy, fold_recall, fold_val_loss, fold_val_accuracy, fold_val_recall, fold_combined_accuracy))

        all_losses.append(np.mean([result[0] for result in fold_results]))
        all_accuracies.append(np.mean([result[1] for result in fold_results]))
        all_recalls.append(np.mean([result[2] for result in fold_results]))
        all_val_losses.append(np.mean([result[3] for result in fold_results]))
        all_val_accuracies.append(np.mean([result[4] for result in fold_results]))
        all_val_recalls.append(np.mean([result[5] for result in fold_results]))

        if withOptuna and trial:
            trial.report(np.mean([result[6] for result in fold_results]), epoch)
            
            if trial.should_prune():
               # raise optuna.TrialPruned()
               pass

        if not withOptuna:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {fold_loss:.4f}, Recall: {fold_recall*100:.2f}%, Accuracy: {fold_accuracy*100:.2f}%, Val Loss: {fold_val_loss:.4f}, Val Recall: {fold_val_recall*100:.2f}%, Val Accuracy: {fold_val_accuracy*100:.2f}%, Recall Spread: {(fold_recall-fold_val_recall) * 100:.2f}', end='\r')

    if not withOptuna and not testMore:
        plot_loss_accuracy(all_losses, all_accuracies, all_val_losses, all_val_accuracies, all_recalls, all_val_recalls)

In [242]:
if not withOptuna and not testMore:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = SimplifiedModel(2, 64).to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.002, weight_decay=5e-6)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
    train(model, criterion, optimizer, scheduler, device, 50)

In [243]:
def test(model, test_loader, criterion, device, printResult=True):
    model.eval() 
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    recall = 0.0

    with torch.no_grad(): 
        for inputs, targets in test_loader:

            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            
            loss = criterion(outputs.squeeze(), targets.float())
            total_loss += loss.item()
            
            predicted = (outputs.squeeze() > 0.5).float() 
            correct_predictions += (predicted == targets).sum().item()
            total_samples += targets.size(0)

            recall += calculate_recall(targets, predicted)

    average_loss = total_loss / len(test_loader)
    accuracy = (correct_predictions / total_samples)
    recall /= len(test_loader)

    if printResult:
        print(f'Test Loss: {average_loss:.4f}, Test Recall: {recall*100:.2f}%, Test Accuracy: {accuracy*100:.2f}%', end='\r\n')

    return average_loss, accuracy, recall

In [244]:
if not withOptuna and not testMore:
    test(model, test_loader, criterion, device)

In [245]:
if testMore and not withOptuna:

    resultsAccuracy = []
    resultsRecall = []

    for i in range(10):

        print("Trial " + str(i + 1), end='\n')

        generateNewSplit()

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = SimplifiedModel(4, 128).to(device)
        criterion = nn.BCELoss()
        optimizer = optim.Adam(model.parameters(), lr=0.002, weight_decay=3.7e-6)
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.978)
        train(model, criterion, optimizer, scheduler, device, 50)

        results = test(model, test_loader, criterion, device)
        resultsAccuracy.append(results[1])
        resultsRecall.append(results[2])

    print("Accuracy:", f"{sum(resultsAccuracy) / 10 * 100:.2f}%", "Recall:", f"{sum(resultsRecall) / 10 * 100:.2f}%")

# Optuna Hyperparameter Optimization

In [None]:
import optuna
import optunahub

def objective(trial):

    generateNewSplit()
    
    lr = trial.suggest_float('learning_rate', 0.001, 0.003)
    nHiddenLayers = trial.suggest_int('n_layers', 1, 6)
    weightDecay = trial.suggest_float('weigth_decay', 0, 1e-4, log=True)
    gamma = trial.suggest_float('gamma', 0.975, 1)
    numNodes = trial.suggest_int('n_nodes', 64, 128, step=16)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = SimplifiedModel(nHiddenLayers, numNodes).to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weightDecay)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)

    train(model, criterion, optimizer, scheduler, device, 40, trial)
    result = test(model, test_loader, criterion, device)
    accuracies = result[1]
    trial.set_user_attr("recall", result[2])

    return accuracies ## , sum_recalls / num_of_cycles

In [None]:
if withOptuna:
    name = "final Model 2"

    if name in optuna.get_all_study_names(storage="sqlite:///db1.sqlite3"):
        study = optuna.load_study(study_name=name, storage="sqlite:///db1.sqlite3")
    else: 
        study = optuna.create_study(direction='maximize', storage="sqlite:///db1.sqlite3", study_name=name, sampler=optunahub.load_module("samplers/auto_sampler").AutoSampler())

    study.optimize(objective, n_trials=150)

[I 2024-11-30 13:37:07,263] Trial 63 finished with value: 0.9854121079504011 and parameters: {'learning_rate': 0.0011281269331623284, 'n_layers': 3, 'weigth_decay': 1.5812291057475136e-06, 'gamma': 0.9956873671542944, 'n_nodes': 112}. Best is trial 30 with value: 0.9854121079504011.


Test Loss: 0.5079, Test Recall: 98.90%, Test Accuracy: 98.54%


[I 2024-11-30 13:37:50,802] Trial 64 finished with value: 0.9839533187454412 and parameters: {'learning_rate': 0.0010789689856192076, 'n_layers': 3, 'weigth_decay': 1.4327934263665812e-06, 'gamma': 0.9957861745402401, 'n_nodes': 112}. Best is trial 30 with value: 0.9854121079504011.


Test Loss: 0.2882, Test Recall: 98.19%, Test Accuracy: 98.40%


[I 2024-11-30 13:38:33,985] Trial 65 finished with value: 0.987600291757841 and parameters: {'learning_rate': 0.0010811265823205975, 'n_layers': 3, 'weigth_decay': 2.14680323467066e-06, 'gamma': 0.9966607076078402, 'n_nodes': 112}. Best is trial 65 with value: 0.987600291757841.


Test Loss: 0.0573, Test Recall: 98.63%, Test Accuracy: 98.76%


[I 2024-11-30 13:39:18,392] Trial 66 finished with value: 0.9883296863603209 and parameters: {'learning_rate': 0.0011465283825767725, 'n_layers': 3, 'weigth_decay': 2.629590933533437e-06, 'gamma': 0.9966156265939661, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0887, Test Recall: 98.91%, Test Accuracy: 98.83%


[I 2024-11-30 13:40:02,318] Trial 67 finished with value: 0.9810357403355215 and parameters: {'learning_rate': 0.0010647526521738839, 'n_layers': 3, 'weigth_decay': 2.8128830697091488e-06, 'gamma': 0.9969689273958564, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1107, Test Recall: 98.33%, Test Accuracy: 98.10%


[I 2024-11-30 13:40:46,466] Trial 68 finished with value: 0.9708242159008024 and parameters: {'learning_rate': 0.0011597268757601968, 'n_layers': 3, 'weigth_decay': 2.894630775917808e-06, 'gamma': 0.9971137328395792, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1158, Test Recall: 97.97%, Test Accuracy: 97.08%


[I 2024-11-30 13:41:30,802] Trial 69 finished with value: 0.9700948212983224 and parameters: {'learning_rate': 0.0008172918967118567, 'n_layers': 3, 'weigth_decay': 2.167294179154733e-06, 'gamma': 0.9917998337254756, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1582, Test Recall: 98.12%, Test Accuracy: 97.01%


[I 2024-11-30 13:42:14,288] Trial 70 finished with value: 0.9861415025528811 and parameters: {'learning_rate': 0.0008483441689650869, 'n_layers': 3, 'weigth_decay': 7.424061473067989e-05, 'gamma': 0.9950899549752613, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0783, Test Recall: 98.60%, Test Accuracy: 98.61%


[I 2024-11-30 13:42:55,283] Trial 71 finished with value: 0.9839533187454412 and parameters: {'learning_rate': 0.000800095895538357, 'n_layers': 3, 'weigth_decay': 7.136860534482749e-05, 'gamma': 0.9954435380999241, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0845, Test Recall: 98.27%, Test Accuracy: 98.40%


[I 2024-11-30 13:43:35,796] Trial 72 finished with value: 0.9883296863603209 and parameters: {'learning_rate': 0.0012754471369712875, 'n_layers': 3, 'weigth_decay': 4.747794288707992e-05, 'gamma': 0.9977290846772847, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0762, Test Recall: 98.92%, Test Accuracy: 98.83%


[I 2024-11-30 13:44:16,661] Trial 73 finished with value: 0.9773887673231219 and parameters: {'learning_rate': 0.0012827921196617133, 'n_layers': 3, 'weigth_decay': 5.99171810276445e-05, 'gamma': 0.9979252305448664, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1012, Test Recall: 97.99%, Test Accuracy: 97.74%


[I 2024-11-30 13:44:59,314] Trial 74 finished with value: 0.9854121079504011 and parameters: {'learning_rate': 0.001297444440363478, 'n_layers': 3, 'weigth_decay': 3.788761772669258e-06, 'gamma': 0.9947926548505482, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.2718, Test Recall: 98.36%, Test Accuracy: 98.54%


[I 2024-11-30 13:45:41,240] Trial 75 finished with value: 0.9773887673231219 and parameters: {'learning_rate': 0.0008214008695231504, 'n_layers': 3, 'weigth_decay': 3.862335064550067e-06, 'gamma': 0.9948833329499023, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0986, Test Recall: 98.81%, Test Accuracy: 97.74%


[I 2024-11-30 13:46:23,055] Trial 76 finished with value: 0.9584245076586433 and parameters: {'learning_rate': 0.0009429692316428759, 'n_layers': 3, 'weigth_decay': 1.763860782116541e-06, 'gamma': 0.9924656768203315, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.6777, Test Recall: 97.34%, Test Accuracy: 95.84%


[I 2024-11-30 13:47:05,304] Trial 77 finished with value: 0.975200583515682 and parameters: {'learning_rate': 0.0005860952475354207, 'n_layers': 3, 'weigth_decay': 4.1222283252526183e-05, 'gamma': 0.9967640379134314, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1587, Test Recall: 97.54%, Test Accuracy: 97.52%


[I 2024-11-30 13:47:48,067] Trial 78 finished with value: 0.9854121079504011 and parameters: {'learning_rate': 0.0013158388206094271, 'n_layers': 3, 'weigth_decay': 3.971457599427651e-06, 'gamma': 0.9982469916980967, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0772, Test Recall: 98.72%, Test Accuracy: 98.54%


[I 2024-11-30 13:48:30,757] Trial 79 finished with value: 0.9810357403355215 and parameters: {'learning_rate': 0.0012710089914698942, 'n_layers': 3, 'weigth_decay': 5.233177466026869e-06, 'gamma': 0.9981060980827188, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.2160, Test Recall: 98.43%, Test Accuracy: 98.10%


[I 2024-11-30 13:49:12,738] Trial 80 finished with value: 0.986870897155361 and parameters: {'learning_rate': 0.000665203741200469, 'n_layers': 3, 'weigth_decay': 3.466536543775925e-06, 'gamma': 0.9984271656270562, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1041, Test Recall: 98.65%, Test Accuracy: 98.69%


[I 2024-11-30 13:49:55,512] Trial 81 finished with value: 0.9788475565280816 and parameters: {'learning_rate': 0.0006849376998293453, 'n_layers': 3, 'weigth_decay': 2.1669699221266303e-06, 'gamma': 0.9943010547939366, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.2701, Test Recall: 98.38%, Test Accuracy: 97.88%


[I 2024-11-30 13:50:39,097] Trial 82 finished with value: 0.9839533187454412 and parameters: {'learning_rate': 0.0011277939522124593, 'n_layers': 3, 'weigth_decay': 3.6311304114478388e-06, 'gamma': 0.9977827140201339, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1371, Test Recall: 98.33%, Test Accuracy: 98.40%


[I 2024-11-30 13:51:21,562] Trial 83 finished with value: 0.9824945295404814 and parameters: {'learning_rate': 0.0009923829255744552, 'n_layers': 3, 'weigth_decay': 5.398411662911744e-06, 'gamma': 0.9983248524789489, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0789, Test Recall: 98.16%, Test Accuracy: 98.25%


[I 2024-11-30 13:52:03,989] Trial 84 finished with value: 0.9730123997082422 and parameters: {'learning_rate': 0.001387308630579069, 'n_layers': 3, 'weigth_decay': 1.710805184809897e-06, 'gamma': 0.9959282472117321, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1970, Test Recall: 98.05%, Test Accuracy: 97.30%


[I 2024-11-30 13:52:46,252] Trial 85 finished with value: 0.9846827133479212 and parameters: {'learning_rate': 0.0009279719824739322, 'n_layers': 3, 'weigth_decay': 3.349093718405714e-06, 'gamma': 0.9930187961939423, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1231, Test Recall: 98.38%, Test Accuracy: 98.47%


[I 2024-11-30 13:53:28,412] Trial 86 finished with value: 0.9839533187454412 and parameters: {'learning_rate': 0.0011839510539497018, 'n_layers': 3, 'weigth_decay': 2.247118499792326e-06, 'gamma': 0.994113791620631, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.2095, Test Recall: 98.57%, Test Accuracy: 98.40%


[I 2024-11-30 13:54:09,994] Trial 87 finished with value: 0.9824945295404814 and parameters: {'learning_rate': 0.00047000943310649855, 'n_layers': 3, 'weigth_decay': 1.3810794885569023e-06, 'gamma': 0.9985631749690345, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0631, Test Recall: 98.00%, Test Accuracy: 98.25%


[I 2024-11-30 13:54:51,758] Trial 88 finished with value: 0.9679066374908826 and parameters: {'learning_rate': 0.0003399951126584885, 'n_layers': 3, 'weigth_decay': 7.867527777042664e-05, 'gamma': 0.9952874817888331, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1910, Test Recall: 96.82%, Test Accuracy: 96.79%


[I 2024-11-30 13:55:33,210] Trial 89 finished with value: 0.9854121079504011 and parameters: {'learning_rate': 0.0013646318142753416, 'n_layers': 3, 'weigth_decay': 6.799402049553193e-06, 'gamma': 0.9965428685344884, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.3186, Test Recall: 98.91%, Test Accuracy: 98.54%


[I 2024-11-30 13:56:15,402] Trial 90 finished with value: 0.986870897155361 and parameters: {'learning_rate': 0.0015373869362412355, 'n_layers': 3, 'weigth_decay': 4.329876518128827e-06, 'gamma': 0.9988626483637025, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.3849, Test Recall: 98.64%, Test Accuracy: 98.69%


[I 2024-11-30 13:56:57,066] Trial 91 finished with value: 0.986870897155361 and parameters: {'learning_rate': 0.0015468097275083091, 'n_layers': 3, 'weigth_decay': 1.947526307289902e-06, 'gamma': 0.992584469076212, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1060, Test Recall: 98.89%, Test Accuracy: 98.69%


[I 2024-11-30 13:57:39,250] Trial 92 finished with value: 0.9810357403355215 and parameters: {'learning_rate': 0.0015483212415510976, 'n_layers': 3, 'weigth_decay': 2.9757066824528247e-06, 'gamma': 0.9925177745143836, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.4543, Test Recall: 98.36%, Test Accuracy: 98.10%


[I 2024-11-30 13:58:20,865] Trial 93 finished with value: 0.9883296863603209 and parameters: {'learning_rate': 0.0011935737195761433, 'n_layers': 3, 'weigth_decay': 1.8889992280477921e-06, 'gamma': 0.9950307652544136, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0912, Test Recall: 98.90%, Test Accuracy: 98.83%


[I 2024-11-30 13:59:02,966] Trial 94 finished with value: 0.973741794310722 and parameters: {'learning_rate': 0.0007063307807076813, 'n_layers': 3, 'weigth_decay': 2.346449941029749e-06, 'gamma': 0.9989442426058165, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1347, Test Recall: 96.95%, Test Accuracy: 97.37%


[I 2024-11-30 13:59:44,725] Trial 95 finished with value: 0.9803063457330415 and parameters: {'learning_rate': 0.0015152859994472978, 'n_layers': 3, 'weigth_decay': 1.8740614955496725e-06, 'gamma': 0.9903002160620596, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.2534, Test Recall: 98.19%, Test Accuracy: 98.03%


[I 2024-11-30 14:00:27,295] Trial 96 finished with value: 0.9781181619256017 and parameters: {'learning_rate': 0.0012174975439492603, 'n_layers': 3, 'weigth_decay': 1.3815019071888346e-06, 'gamma': 0.9963190873996067, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.2909, Test Recall: 97.18%, Test Accuracy: 97.81%


[I 2024-11-30 14:01:09,083] Trial 97 finished with value: 0.9817651349380014 and parameters: {'learning_rate': 0.0009236662159519786, 'n_layers': 3, 'weigth_decay': 1.1933777411989112e-06, 'gamma': 0.9938858900373514, 'n_nodes': 80}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1059, Test Recall: 98.75%, Test Accuracy: 98.18%


[I 2024-11-30 14:01:51,333] Trial 98 finished with value: 0.9846827133479212 and parameters: {'learning_rate': 0.0010862953552588943, 'n_layers': 3, 'weigth_decay': 1.930601132066355e-06, 'gamma': 0.9914756022922542, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1128, Test Recall: 98.68%, Test Accuracy: 98.47%


[I 2024-11-30 14:02:33,977] Trial 99 finished with value: 0.9547775346462436 and parameters: {'learning_rate': 0.0010186867996412705, 'n_layers': 3, 'weigth_decay': 2.513474966767603e-06, 'gamma': 0.9989642218576188, 'n_nodes': 96}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.1684, Test Recall: 97.05%, Test Accuracy: 95.48%


[I 2024-11-30 14:03:16,113] Trial 100 finished with value: 0.975929978118162 and parameters: {'learning_rate': 0.0003142654532879909, 'n_layers': 3, 'weigth_decay': 1.5279551755360928e-06, 'gamma': 0.9929509882079909, 'n_nodes': 112}. Best is trial 66 with value: 0.9883296863603209.


Test Loss: 0.0867, Test Recall: 97.38%, Test Accuracy: 97.59%


# Save model

In [None]:
# torch.save(model, 'entire_model.pth')

In [None]:
# For loading:
# model = torch.load('entire_model.pth')
# model.eval()