We need to install some packages (pip install "package"):
- matplotlib
- numpy
- scikit-learn
- tensorboard
- torch

In [14]:
import os
import pickle
import random

import pandas as pd
import torch
from torch import nn
import time

In [2]:
# For reproducibility
def fix_random(seed: int) -> None:
    """Fix all the possible sources of randomness.

    Args:
        seed: the seed to use. 
    """
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True  # slower


seed = 42

In [3]:
# Define the Data Layer
class TabDataset(torch.utils.data.Dataset):
    def __init__(self, x_cat, x_num, y):
        self.x_cat = x_cat
        self.x_num = x_num
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.x_cat[idx], self.x_num[idx], self.y[idx]


In [4]:
from sklearn.preprocessing import OrdinalEncoder

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler


class CustomOrdinalEncoder(OrdinalEncoder):
    def transform(self, X):
        encoded = super().transform(X)
        # Shift all values by +1 and replace unknown_value (-1) with 0
        return np.where(encoded == -1, 0, encoded + 1)

    def inverse_transform(self, X):
        # Handle the inverse transform to account for the +1 offset
        X = np.where(X == 0, -1, X - 1)
        return super().inverse_transform(X)


# define a function with different normalization and scaling techniques
def preprocess(X_train, X_val):
    categorical_columns = X_train.select_dtypes(include=["object"]).columns.tolist()
    # print("categorical_columns len: ", len(categorical_columns))
    numeric_columns = X_train.select_dtypes(include=["int64", "float64"]).columns.tolist()
    # print("numeric_columns len: ", len(numeric_columns))

    ct = ColumnTransformer(
        [
            ("cat", CustomOrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), categorical_columns),  # Trasforma le colonne categoriche
            # ("ordinal", OneHotEncoder(handle_unknown='infrequent_if_exist', sparse_output=False), categorical_columns),  # Trasforma le colonne categoriche
            ("scale", StandardScaler(), numeric_columns)  # Normalizza le colonne numeriche
        ],
        remainder="passthrough"  # Mantieni le altre colonne invariate
    )
    ct.set_output(transform="pandas")

    ct = ct.fit(X_train)
    with open(f"{filepath}/transformer/transformer_ff.save", "wb") as f:
        pickle.dump(ct, f)

    # train set
    X_train = ct.transform(X_train)

    cat_idxs = [i for i, f in enumerate(X_train.columns) if "cat__" in f]
    cat_dims = [len(X_train[f].unique()) + 1 for i, f in enumerate(X_train.columns) if "cat__" in f]
    num_idxs = [i for i, f in enumerate(X_train.columns) if "scale__" in f]
    numeric_columns_number = len(num_idxs)

    X_train = X_train.to_numpy()
    # validation set
    X_val = ct.transform(X_val).to_numpy()

    return X_train, X_val, cat_idxs, cat_dims, num_idxs, numeric_columns_number

In [5]:
# Architecture

class FeedForwardPlus(nn.Module):
    def __init__(self, cat_dims, num_numerical, num_categorical, num_classes, hidden_size, depth=1, batch_norm=False, drop=0, dim_embedding=8, ):
        super(FeedForwardPlus, self).__init__()

        self.embeddings = nn.ModuleList([
            nn.Embedding(cat_dim, dim_embedding) for cat_dim in cat_dims
        ])

        self.numerical_norm = nn.LayerNorm(num_numerical)

        model = []
        model += [nn.Linear(num_categorical * dim_embedding + num_numerical, hidden_size)]
        if batch_norm:
            model += [nn.BatchNorm1d(hidden_size)]
        model += [nn.ReLU()]

        block = [
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU()
        ]

        block_batch_norm = [
            nn.Linear(hidden_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU()
        ]

        block_dropout = [
            nn.Dropout(drop),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU()
        ]

        for i in range(depth):
            if not batch_norm and drop == 0:
                model += block
            elif batch_norm and drop == 0:
                model += block_batch_norm
            elif drop > 0 and not batch_norm:
                model += block_dropout

        self.model = nn.Sequential(*model)

        self.output = nn.Linear(hidden_size, num_classes)

    def forward(self, x_cat, x_num):
        x_cat = x_cat.long()
        cat_embeddings = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        cat_embeddings = torch.stack(cat_embeddings, dim=1)  # (batch_size, num_categorical_features, dim_embedding)
        cat_embeddings = cat_embeddings.view(cat_embeddings.size(0), -1)  # Flatten per concatenare

        x_num = self.numerical_norm(x_num)
        x = torch.cat([cat_embeddings, x_num], dim=1)
        h = self.model(x)
        out = self.output(h)
        return out


import torch
import numpy as np


class PyTorchTabTransformer:
    def __init__(self, model, cat_idx, num_idx, device='cpu'):
        self.model = model
        self.device = device
        self.model.to(self.device)
        self.cat_idx = cat_idx
        self.num_idx = num_idx

    def predict(self, X):
        """
        Esegue le previsioni sul modello PyTorch.
        """
        self.model.eval()  # Modalità di valutazione
        with torch.no_grad():
            # Controlla se X è un array numpy e convertilo in un tensore PyTorch
            if isinstance(X, np.ndarray):
                X = torch.tensor(X, dtype=torch.float32).to(self.device)

            # Supponi che X sia diviso in categoriale e numerico
            y_pred = self.model(X[:, self.cat_idx].long(),
                                X[:, self.num_idx])
            return torch.argmax(y_pred, dim=1).cpu().numpy()

In [6]:
# Define a function for the training process

def train_model(model: FeedForwardPlus, criterion, optimizer, epoch, scheduler, train_loader, val_loader, device, writer, log_name="model"):
    n_iter = 0
    best_valid_loss = float('inf')
    patience = 20
    epochs_since_last_improvement = 0

    start = time.time()

    for epoch in range(epoch):
        model.train()

        start_epoch = time.time()
        loss_train = 0

        for x_cat, x_num, targets in train_loader:
            x_cat, x_num, targets = x_cat.to(device), x_num.to(device), targets.to(device)
            optimizer.zero_grad()

            # Forward pass
            y_pred = model(x_cat, x_num)

            # Compute Loss
            loss = criterion(y_pred, targets)

            # Backward pass
            loss.backward()
            optimizer.step()

            n_iter += 1
            loss_train += loss.item()

        loss_train /= len(train_loader)
        writer.add_scalar("Metrics/Loss/train", loss_train, epoch)

        y_true, y_pred_c, y_pred = test_model(model, val_loader, device)
        loss_val = criterion(y_pred, y_true).item()
        writer.add_scalar("Metrics/Loss/val", loss_val, epoch)

        perf = evaluate_model(y_true.detach().cpu().numpy(), y_pred_c.detach().cpu().numpy())

        print(f'Epoch [{epoch}] - {time.time() - start:.2f} seconds - Train Loss: {loss_train:.6f} - Val Loss: {loss_val:.6f} - Accuracy: {perf["acc"]:.3f} - Balanced Accuracy: {perf["bacc"]:.3f} - Val F1 Score: {perf["f1"]:.3f}')
        writer.add_scalar("Metrics/Accuracy/val", perf["acc"], epoch)
        writer.add_scalar("Metrics/Balanced Accuracy/val", perf["bacc"], epoch)
        writer.add_scalar("Metrics/F1 Score/val", perf["f1"], epoch)

        threshold = 0.01
        # save best model
        if loss_val < best_valid_loss - threshold:
            best_valid_loss = loss_val
            torch.save(model, f"{filepath}/models/best_model_ff.save")
            if not os.path.exists('models'):
                os.makedirs('models')
            # with open(f"{filepath}/models/model_best_tf_custom.save", "wb") as f:
            #     wrapped_model = PyTorchTabTransformer(model, cat_idxs, num_idxs, device)
            #     pickle.dump(wrapped_model, f)
            epochs_since_last_improvement = 0
        elif epochs_since_last_improvement >= patience:
            break
        else:
            epochs_since_last_improvement += 1

        writer.add_scalar("hparam/Learning Rate", scheduler.get_last_lr()[0], epoch)

        scheduler.step()

    best_model = torch.load(f"{filepath}/models/best_model_ff.save")
    return best_model, best_valid_loss

In [7]:
from sklearn.metrics import balanced_accuracy_score, f1_score, accuracy_score


# Define a function to evaluate the performance on validation and test sets

def test_model(model, data_loader, device):
    model.eval()
    y_pred = []
    y_test = []

    for x_cat, x_num, targets in data_loader:
        x_cat, x_num, targets = x_cat.to(device), x_num.to(device), targets.to(device)
        y_pred += model(x_cat, x_num)
        #print(y_pred)
        y_test += targets
        #print(targets)

    y_test = torch.stack(y_test).squeeze()
    y_pred = torch.stack(y_pred).squeeze()
    y_pred_c = y_pred.argmax(dim=1, keepdim=True).squeeze()

    return y_test, y_pred_c, y_pred


def evaluate_model(y_test, y_pred):
    acc = accuracy_score(y_test, y_pred)
    bacc = balanced_accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    return {"acc": acc, "bacc": bacc, "f1": f1}

In [8]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

print('Using device:', device)

Using device: cuda


In [9]:
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold, train_test_split, StratifiedGroupKFold

save_in_test_folder = True
if save_in_test_folder:
    filepath = "../TestModule"
else:
    filepath = "."

FILENAME = "dataset/train_dataset.csv"

#Prepare train data
data = pd.read_csv(FILENAME, sep=",", low_memory=False)

# get features names
features = list(data.columns)
# features_to_remove = ["label", "ts", "src_ip", "dst_ip", "dns_query", "ssl_subject", "ssl_issuer", "http_uri", "type", "http_referrer", "http_user_agent"]
features_to_remove = ["type", "label", "ts", "http_referrer"]
features = [feature for feature in features if feature not in features_to_remove]
data = data[features + ["type"]]

# Converte i valori in numeri, sostituendo quelli non validi con NaN
data["src_bytes"] = pd.to_numeric(data["src_bytes"], errors='coerce')
# Filtra le righe con NaN (valori non convertibili)
data = data.dropna(subset=["src_bytes"])
# Converte i valori rimasti in interi
data.loc[:, "src_bytes"] = data["src_bytes"].astype(int)

print("#Righe: " + str(data.shape[0]) + " #Colonne: " + str(data.shape[1]))
df1 = data.dropna()
print("#Righe: " + str(df1.shape[0]) + " #Colonne: " + str(data.shape[1]))

# data = data.sample(n=1000, random_state=5)

X = data[features]
y = data["type"]

le = preprocessing.LabelEncoder()
le.fit(y)
with open(f"{filepath}/transformer/target_encoder.save", "wb") as f:
    pickle.dump(le, f)

num_classes = len(le.classes_)

y = le.transform(y)

# Separate indices
indices = np.arange(X.shape[0])
train_idx, val_idx = train_test_split(indices, test_size=0.2, stratify=y, random_state=seed)

X_val = X.iloc[val_idx]
y_val = y[val_idx]
X_train = X.iloc[train_idx]
y_train = y[train_idx]

X_train, X_val, cat_idxs, cat_dims, num_idxs, numeric_columns_number = preprocess(X_train, X_val)

print(X_train.shape)
print(X_val.shape)

#Righe: 616983 #Colonne: 43
#Righe: 616983 #Colonne: 43
(493586, 42)
(123397, 42)


## Define weights for unbalanced classes

In [10]:
from sklearn.utils import compute_class_weight, compute_sample_weight

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights))
sample_weight = compute_sample_weight(class_weight='balanced', y=y_train)
print(class_weights)

{0: 4.069469865611345, 1: 0.3381003918130257, 2: 1.132545546326465, 3: 4.543735616312253, 4: 98.7172, 5: 2.9863625363020327, 6: 1.1966881637007225, 7: 63.85329883570505, 8: 0.28803534018428717, 9: 0.9751965859248429}


In [11]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

# filter the categorical and numerical features
X_cat_train = X_train_tensor[:, cat_idxs]
X_num_train = X_train_tensor[:, num_idxs]

X_cat_val = X_val_tensor[:, cat_idxs].long()
X_num_val = X_val_tensor[:, num_idxs]

# Create the dataset
train_dataset = TabDataset(X_cat_train, X_num_train, y_train_tensor)
val_dataset = TabDataset(X_cat_val, X_num_val, y_val_tensor)

Run Tensorboard from the command line:

"tensorboard --logdir runs/"

In [28]:
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
import time

start = time.time()
# Hyperparatemeters
batch_size = 1024
depth = 4
hidden_size = 128
batch_norm = True
drop = 1
num_epochs = 100
learning_rate = 0.01
gamma = 0.5
step_size = 10

log_name = "B" + str(batch_size) + "-dim" + str(hidden_size) + "-dp" + str(depth) + "-ep" + str(num_epochs) + "-lr" + str(learning_rate) + "-steplr" + str(step_size) + "-gamma" + str(gamma) + "-BN" + str(batch_norm) + "-drop" + str(drop)

# fix the seed for reproducibility
fix_random(seed)

# Create relative dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Define the architecture, loss and optimizer
model = FeedForwardPlus(cat_dims, numeric_columns_number, len(cat_dims), num_classes, hidden_size, depth, batch_norm=batch_norm, drop=drop)
print(model)
model.to(device)

# Define the training elements
criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor(list(class_weights.values()), dtype=torch.float32).to(device))
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

# Start tensorboard
writer = SummaryWriter('runs/' + log_name)

# Train the model
model, best_valid_loss = train_model(model, criterion, optimizer, num_epochs, scheduler, train_loader, val_loader, device, writer, log_name)

y_true, y_pred_c, y_pred = test_model(model, val_loader, device)
perf = evaluate_model(y_true.detach().cpu().numpy(), y_pred_c.detach().cpu().numpy())

writer.add_hparams(
    {
        'hparam/bsize': batch_size,
        'hparam/hidden size': hidden_size,
        'hparam/depth': depth + 2,
        'hparam/scheduler': gamma,
        'hparam/batch norm': batch_norm
    },
    {
        'Best loss': best_valid_loss,
        'Accuracy': perf['acc'],
        'Balanced accuracy': perf['bacc'],
        'F1 score': perf['f1']
    }
)

# Close tensorboard writer after a training
writer.flush()
writer.close()

# Save timestamp
end = time.time()
print("Time elapsed:", end - start)

FeedForwardPlus(
  (embeddings): ModuleList(
    (0): Embedding(669, 8)
    (1): Embedding(1499, 8)
    (2): Embedding(4, 8)
    (3): Embedding(23, 8)
    (4): Embedding(14, 8)
    (5): Embedding(1888, 8)
    (6-9): 4 x Embedding(4, 8)
    (10): Embedding(6, 8)
    (11): Embedding(8, 8)
    (12-13): 2 x Embedding(4, 8)
    (14-15): 2 x Embedding(5, 8)
    (16): Embedding(3, 8)
    (17): Embedding(4, 8)
    (18): Embedding(47, 8)
    (19): Embedding(3, 8)
    (20): Embedding(102, 8)
    (21): Embedding(4, 8)
    (22): Embedding(6, 8)
    (23): Embedding(13, 8)
    (24): Embedding(5, 8)
    (25): Embedding(3, 8)
  )
  (numerical_norm): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
  (model): Sequential(
    (0): Linear(in_features=224, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (output): Linear(in_features=128, out_features=10, bias=True)
)
Epoch [0] - 16.23 seconds - Train Loss: 0.15

KeyboardInterrupt: 

In [None]:
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
import time

# Cross-validation
kf = StratifiedKFold(n_splits=5)

# Grid Search

# Hyperparameters
seed = 42
batch_sizes = [512, 1024]
hidden_sizes = [64, 128]
batch_norm_list = [False,True]
drop = [0, 1]
depths = [2, 4]
num_epochs = 100
learning_rate = 0.01
gammas = [0.5, 0.9]
step_size = 10

import itertools

hyperparameters = list(itertools.product(batch_sizes, hidden_sizes, depths, gammas, batch_norm_list, drop))
n_comb = len(hyperparameters)
print(f'Number of hyperparameter combinations: {n_comb}')
current_iter = 0

# grid search loop
for batch_size, hidden_size, depth, gamma, batch_norm, drop in hyperparameters:
    fix_random(seed)
    start = time.time()

    log_name = "B" + str(batch_size) + "-dim" + str(hidden_size) + "-dp" + str(depth) + "-ep" + str(num_epochs) + "-lr" + str(learning_rate) + "-steplr" + str(step_size) + "-gamma" + str(gamma) + "-BN" + str(batch_norm) + "-drop" + str(drop)
    print(f'Iteration {current_iter + 1}/{n_comb}', log_name)
    #start tensorboard
    writer = SummaryWriter('runs/' + log_name)
    accuracy_per_fold = []
    balanced_accuracy_score_per_fold = []
    f1_score_per_fold = []
    best_loss_per_fold = []

    fold = 1
    for train_index, val_index in kf.split(X, y):
        X_train_fold, X_val_fold = X.iloc[train_index], X.iloc[val_index]
        y_train_fold, y_val_fold = y[train_index], y[val_index]

        X_train_fold, X_val_fold, cat_idxs_fold, cat_dims_fold, num_idxs_fold, numeric_columns_number_fold = preprocess(X_train_fold, X_val_fold)

        X_train_tensor_fold = torch.tensor(X_train_fold, dtype=torch.float32)
        y_train_tensor_fold = torch.tensor(y_train_fold, dtype=torch.long)

        X_val_tensor_fold = torch.tensor(X_val_fold, dtype=torch.float32)
        y_val_tensor_fold = torch.tensor(y_val_fold, dtype=torch.long)

        # filter the categorical and numerical features
        X_cat_train_fold = X_train_tensor_fold[:, cat_idxs_fold]
        X_num_train_fold = X_train_tensor_fold[:, num_idxs_fold]

        X_cat_val_fold = X_val_tensor_fold[:, cat_idxs_fold].long()
        X_num_val_fold = X_val_tensor_fold[:, num_idxs_fold]

        train_dataset_fold = TabDataset(X_cat_train_fold, X_num_train_fold, y_train_tensor_fold)
        val_dataset_fold = TabDataset(X_cat_val_fold, X_num_val_fold, y_val_tensor_fold)

        train_loader_fold = DataLoader(train_dataset_fold, batch_size=batch_size, shuffle=True)
        val_loader_fold = DataLoader(val_dataset_fold, batch_size=batch_size)

        # define architecture, loss and optimizer
        model = FeedForwardPlus(cat_dims_fold, numeric_columns_number_fold, len(cat_dims_fold), num_classes, hidden_size, depth, batch_norm=batch_norm, drop=drop)
        model.to(device)

        class_weights_fold = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_fold), y=y_train_fold)
        class_weights_fold = dict(enumerate(class_weights_fold))

        # train
        criterion = torch.nn.CrossEntropyLoss(weight=torch.tensor(list(class_weights_fold.values()), dtype=torch.float32).to(device))
        # optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
        model, best_valid_loss = train_model(model, criterion, optimizer, num_epochs, scheduler, train_loader_fold, val_loader_fold, device, writer, log_name)

        # Valuta il modello sul validation set
        y_true, y_pred_c, y_pred = test_model(model, val_loader_fold, device)
        perf = evaluate_model(y_true.detach().cpu().numpy(), y_pred_c.detach().cpu().numpy())

        print(f"Fold {fold} - Accuracy: {perf['acc']:.2f}%")
        print(f"Fold {fold} - Balanced Accuracy: {perf['bacc']:.2f}%")
        print(f"Fold {fold} - F1 Score: {perf['f1']:.2f}%")

        accuracy_per_fold.append(perf["acc"])
        balanced_accuracy_score_per_fold.append(perf["bacc"])
        f1_score_per_fold.append(perf["f1"])
        best_loss_per_fold.append(best_valid_loss)
        fold += 1

    # Riassunto dei risultati
    print("Cross-Validation Results:")
    print(f"Average Accuracy: {np.mean(accuracy_per_fold) * 100:.2f}%")
    print(f"Standard Deviation of Accuracy: {np.std(accuracy_per_fold) * 100:.2f}%")
    print(f"Average Balanced Accuracy: {np.mean(balanced_accuracy_score_per_fold) * 100:.2f}%")
    print(f"Standard Deviation of Balanced Accuracy: {np.std(balanced_accuracy_score_per_fold) * 100:.2f}%")
    print(f"Average F1 Score: {np.mean(f1_score_per_fold) * 100:.2f}%")
    print(f"Standard Deviation of F1 Score: {np.std(f1_score_per_fold) * 100:.2f}%")

    # Close tensorboard writer after a training
    # Log hyperparameters and metrics to TensorBoard
    writer.add_hparams(
        {
            'hparam/bsize': batch_size,
            'hparam/hidden size': hidden_size,
            'hparam/depth': depth + 2,
            'hparam/scheduler': gamma,
            'hparam/batch norm': batch_norm
        },
        {
            'Best Loss': np.mean(best_loss_per_fold),
            'Avg Accuracy': np.mean(accuracy_per_fold),
            'Std Accuracy': np.std(accuracy_per_fold),
            'Avg Balanced Accuracy': np.mean(balanced_accuracy_score_per_fold),
            'Std Balanced Accuracy': np.std(balanced_accuracy_score_per_fold),
            'Avg F1 score': np.mean(f1_score_per_fold),
            'Std F1 score': np.std(f1_score_per_fold)
        }
    )
    writer.flush()
    print("Best loss:", best_valid_loss)
    print("Time elapsed:", time.time() - start)
    current_iter += 1
    writer.close()

Number of hyperparameter combinations: 64
Iteration 1/64 B512-dim64-dp2-ep100-lr0.01-steplr10-gamma0.5-BNFalse-drop0
Epoch [0] - 22.78 seconds - Train Loss: 0.270018 - Val Loss: 0.110735 - Accuracy: 0.960 - Balanced Accuracy: 0.955 - Val F1 Score: 0.963
Epoch [1] - 44.69 seconds - Train Loss: 0.141636 - Val Loss: 0.140529 - Accuracy: 0.963 - Balanced Accuracy: 0.953 - Val F1 Score: 0.966
Epoch [2] - 67.04 seconds - Train Loss: 0.154846 - Val Loss: 0.139303 - Accuracy: 0.958 - Balanced Accuracy: 0.960 - Val F1 Score: 0.960
Epoch [3] - 87.47 seconds - Train Loss: 0.172256 - Val Loss: 0.275106 - Accuracy: 0.958 - Balanced Accuracy: 0.951 - Val F1 Score: 0.961
Epoch [4] - 108.43 seconds - Train Loss: 0.137846 - Val Loss: 0.209730 - Accuracy: 0.971 - Balanced Accuracy: 0.966 - Val F1 Score: 0.972
Epoch [5] - 130.29 seconds - Train Loss: 0.164581 - Val Loss: 0.103233 - Accuracy: 0.959 - Balanced Accuracy: 0.964 - Val F1 Score: 0.964
Epoch [6] - 151.15 seconds - Train Loss: 0.074198 - Val Los