In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, csv file I/O (e.g. pd.read_csv)
import os # deal with os primitives
import time # time-related functions
import matplotlib.pyplot as plt # plots
import pickle # object serialization
from collections import Counter # dict subclass for counting hashable items
from tqdm import tqdm # iterable object
import random # random values generator
import seaborn as sns # prettier plots
import torch # main package for PyTorch
import torch.utils.data as data_utils # access data sets, including pre-processing, loading, and splitting
from torch.utils.data import random_split # randomly split a dataset 
import torch.optim as optim # optimization algorithms
import torch.nn as nn # build neural network (layers, activations, loss functions)
import torch.nn.functional as F # functions used to build neural network
from torchsummary import summary # print the summary of a neural network model
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts # scheduler used to adjust the learning rate
from torch.utils.tensorboard.writer import SummaryWriter # nn log writer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import torch

# set the random seed for reproducibility
torch.manual_seed(1111)
torch.cuda.manual_seed(1111)
np.random.seed(1111)
random.seed(1111)





In [2]:
df_train = pd.read_csv("./data/df_train.csv")
df_train

Unnamed: 0,num_nodes,num_edges,ratio_n_m,ratio_m_n,density,min_degree,max_degree,mean_degree,median_degree,q0.25_degree,q0.75_degree,variation_coefficient_degree,entropy_degree,best_alg
0,3231,6094,0.530194,1.886103,0.001168,2,4,3.772207,4.0,4.0,4.0,0.111964,0.783043,astar_alg
1,1129,53906,0.020944,47.746678,0.084657,65,122,95.493357,96.0,89.0,102.0,0.096467,5.217304,dfs_alg
2,4100,1254237,0.003269,305.911463,0.149262,538,694,611.822927,611.0,597.0,627.0,0.036566,6.499606,dfs_alg
3,4277,12831,0.333333,3.000000,0.001403,6,6,6.000000,6.0,6.0,6.0,0.000000,-0.000000,astar_alg
4,4712,9286,0.507431,1.970713,0.000837,2,4,3.941426,4.0,4.0,4.0,0.060489,0.324730,dfs_alg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5330,1874,243966,0.007681,130.184632,0.139012,211,311,260.369264,261.0,250.0,271.0,0.056847,5.890843,astar_alg
5331,1843,3686,0.500000,2.000000,0.002172,4,4,4.000000,4.0,4.0,4.0,0.000000,-0.000000,astar_alg
5332,3349,6698,0.500000,2.000000,0.001195,4,4,4.000000,4.0,4.0,4.0,0.000000,-0.000000,astar_alg
5333,3317,429198,0.007728,129.393428,0.078042,210,317,258.786856,259.0,248.0,269.0,0.058965,5.952970,astar_alg


In [3]:
from sklearn.preprocessing import LabelEncoder

le_best_alg = LabelEncoder()

# Encode 'best_alg'
df_train['best_alg_encoded'] = le_best_alg.fit_transform(df_train['best_alg'])

# Print the mapping for 'best_alg'
print("\nMapping for 'best_alg':")
for original, encoded in zip(le_best_alg.classes_, le_best_alg.transform(le_best_alg.classes_)):
    print(f"{original}: {encoded}")



Mapping for 'best_alg':
astar_alg: 0
bfs_alg: 1
dfs_alg: 2


In [4]:
df_train = df_train.drop(columns=['best_alg'])


In [5]:
df_train.columns

Index(['num_nodes', 'num_edges', 'ratio_n_m', 'ratio_m_n', 'density',
       'min_degree', 'max_degree', 'mean_degree', 'median_degree',
       'q0.25_degree', 'q0.75_degree', 'variation_coefficient_degree',
       'entropy_degree', 'best_alg_encoded'],
      dtype='object')

In [6]:
df_train

Unnamed: 0,num_nodes,num_edges,ratio_n_m,ratio_m_n,density,min_degree,max_degree,mean_degree,median_degree,q0.25_degree,q0.75_degree,variation_coefficient_degree,entropy_degree,best_alg_encoded
0,3231,6094,0.530194,1.886103,0.001168,2,4,3.772207,4.0,4.0,4.0,0.111964,0.783043,0
1,1129,53906,0.020944,47.746678,0.084657,65,122,95.493357,96.0,89.0,102.0,0.096467,5.217304,2
2,4100,1254237,0.003269,305.911463,0.149262,538,694,611.822927,611.0,597.0,627.0,0.036566,6.499606,2
3,4277,12831,0.333333,3.000000,0.001403,6,6,6.000000,6.0,6.0,6.0,0.000000,-0.000000,0
4,4712,9286,0.507431,1.970713,0.000837,2,4,3.941426,4.0,4.0,4.0,0.060489,0.324730,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5330,1874,243966,0.007681,130.184632,0.139012,211,311,260.369264,261.0,250.0,271.0,0.056847,5.890843,0
5331,1843,3686,0.500000,2.000000,0.002172,4,4,4.000000,4.0,4.0,4.0,0.000000,-0.000000,0
5332,3349,6698,0.500000,2.000000,0.001195,4,4,4.000000,4.0,4.0,4.0,0.000000,-0.000000,0
5333,3317,429198,0.007728,129.393428,0.078042,210,317,258.786856,259.0,248.0,269.0,0.058965,5.952970,0


In [7]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda')
        print("[i] USING CUDA")
    else:
        device = torch.device('cpu') # don't have GPU 
        print("[i] USING CPU")
    return device

device = get_device() #setting up the DL device

[i] USING CPU


In [8]:
import torch
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import pickle  # Import pickle for saving the scaler

# Assuming 'your_dataframe' contains the DataFrame with the features and target column
columns_to_normalize = [
    'num_nodes', 'num_edges', 'ratio_n_m', 'ratio_m_n', 'density', 'min_degree', 'max_degree', 'mean_degree', 'median_degree',
    'q0.25_degree', 'q0.75_degree', 'variation_coefficient_degree', 'entropy_degree'
]

# Exclude 'starting_node' and 'target_node' from columns_to_normalize
columns_to_normalize = [col for col in columns_to_normalize]

# Extract features and target columns
features = df_train.drop(columns=["best_alg_encoded"]).copy()
target = df_train['best_alg_encoded'].copy()

# Normalize the features
scaler = MinMaxScaler()
features[columns_to_normalize] = scaler.fit_transform(features[columns_to_normalize])

# Save the scaler to a file
scaler_filename = "./data/data_scaler.pkl"
with open(scaler_filename, 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)


In [9]:
# Apply one-hot encoding to the target variable
encoder = OneHotEncoder(sparse_output=False)  # Set sparse_output to False
target_encoded = encoder.fit_transform(target.values.reshape(-1, 1))

# Convert to PyTorch tensors
features_tensor = torch.from_numpy(features.values).float()
target_tensor = torch.from_numpy(target_encoded).float()  # Use float instead of long for one-hot encoded targets

In [10]:
features

Unnamed: 0,num_nodes,num_edges,ratio_n_m,ratio_m_n,density,min_degree,max_degree,mean_degree,median_degree,q0.25_degree,q0.75_degree,variation_coefficient_degree,entropy_degree
0,0.626325,0.002382,0.009037,0.003709,0.005600,0.002195,0.001805,0.003709,0.003968,0.004044,0.003895,0.019332,0.114150
1,0.205841,0.021106,0.000324,0.094712,0.423149,0.071350,0.108303,0.094712,0.095238,0.089990,0.099318,0.016657,0.760567
2,0.800160,0.491166,0.000022,0.606999,0.746249,0.590560,0.624549,0.606999,0.606151,0.603640,0.610516,0.006314,0.947498
3,0.835567,0.005020,0.005669,0.005919,0.006777,0.006586,0.003610,0.005919,0.005952,0.006067,0.005842,0.000000,0.000000
4,0.922585,0.003632,0.008647,0.003877,0.003944,0.002195,0.001805,0.003877,0.003968,0.004044,0.003895,0.010444,0.047338
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5330,0.354871,0.095535,0.000097,0.258297,0.694989,0.231614,0.278881,0.258297,0.258929,0.252781,0.263875,0.009816,0.858753
5331,0.348670,0.001439,0.008520,0.003935,0.010620,0.004391,0.001805,0.003935,0.003968,0.004044,0.003895,0.000000,0.000000
5332,0.649930,0.002619,0.008520,0.003935,0.005735,0.004391,0.001805,0.003935,0.003968,0.004044,0.003895,0.000000,0.000000
5333,0.643529,0.168074,0.000098,0.256727,0.390064,0.230516,0.284296,0.256727,0.256944,0.250758,0.261928,0.010181,0.867810


In [11]:
target_encoded

array([[1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       ...,
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [12]:
print(type(features))
print(features.shape)
print("--------------")
print(type(target_encoded))
print(target_encoded.shape)

<class 'pandas.core.frame.DataFrame'>
(5335, 13)
--------------
<class 'numpy.ndarray'>
(5335, 3)


In [13]:
#Choosing the number or epochs and the learning rate
num_epochs = 10
batch_size = 64
mini_batch_size = 16
num_classes = 3

input_size = features.shape[1]

In [14]:
import torch
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler
import numpy as np

# Calculate class weights
class_sample_count = np.array(
    [len(np.where(target == t)[0]) for t in np.unique(target)])
print(class_sample_count)

weight = 1. / class_sample_count
print(weight)

samples_weight = np.array([weight[t] for t in target])

# Create a WeightedRandomSampler
sampler = WeightedRandomSampler(samples_weight, len(samples_weight), replacement=True)

[3235  702 1398]
[0.00030912 0.0014245  0.00071531]


In [15]:
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data.sampler import WeightedRandomSampler

# Assuming 'features_tensor' and 'target_tensor' are already created

# Combine features and target into a TensorDataset
dataset = TensorDataset(features_tensor, target_tensor)

# Create DataLoader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=sampler)

In [16]:
# Initialize counters for each class
class_counts = [0] * num_classes  # Replace num_classes with the actual number of classes in your dataset

# Iterate through the DataLoader
for batch_idx, (x, y_one_hot) in enumerate(dataloader):
    # Convert one-hot encoded labels to class indices
    y = torch.argmax(y_one_hot, dim=1)

    # Update class counts for each batch
    for class_idx in range(num_classes):
        class_counts[class_idx] += len(torch.where(y == class_idx)[0])

    # Print class distribution within each batch
    batch_class_counts = [len(torch.where(y == class_idx)[0]) for class_idx in range(num_classes)]
    #print(f"Batch {batch_idx} Class Distribution: {batch_class_counts}")

# Print cumulative count of elements in different classes
print("Cumulative Class Counts:", class_counts)

dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=sampler)

Cumulative Class Counts: [1752, 1845, 1738]


In [17]:
from torch.utils.data.dataset import random_split

# Assuming 'dataset' is the TensorDataset you created earlier

# Calculate the sizes for training and validation sets
total_samples = len(dataset)
train_size = int(0.9 * total_samples)
val_size = total_samples - train_size

# Split the dataset into training and validation sets
train_set, validation_set = random_split(dataset, [train_size, val_size])

# Create DataLoader for training set
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=1)

# Create DataLoader for validation set
validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=True, num_workers=1)


In [26]:
import time
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
# define training function

# implement early stopping for training function
# from https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch


class EarlyStopper:
    def __init__(self, patience=1, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss <= self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                print(f"[i] Validation Loss Increased - Early Stop!")
                print(
                    f"--- {validation_loss} > {self.min_validation_loss + self.min_delta} ---")
                return True
        return False


def train(net, train_loader, validation_loader, num_epochs, batch_size, mini_batch_size, optimizer, lr_scheduler, criterion, earlystop_patience=0, earlystop_min_delta=1e-6, name=""):
    # Save the loss into a dataframe
    losses = pd.DataFrame(index=list(range(num_epochs)), columns=[
                          'running_loss', 'train_loss', 'valid_loss'])
    min_validation_loss = np.inf

    # Use a summary writer to check loss in real time
    current_time = time.strftime("%Y%m%d_%H%M%S")
    writer = SummaryWriter(
        f'runs/tensorboard/{current_time}_{(net.__class__.__name__).lower()}_{name}')

    # Set early stopping parameters
    # from https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
    early_stopping = EarlyStopper(
        patience=earlystop_patience, min_delta=earlystop_min_delta)

    start_time_epoch = time.time()

    net.train()
    net.to(device)  # Move the model to the specified device

    for epoch in range(num_epochs):  # Looping over the dataset

        running_loss = 0.0
        validation_loss = 0.0
        train_loss = 0.0

        net.train()  # Set the model to training mode

        for i, data in enumerate(train_loader):
            start_time_mini_batch = time.time()
            inputs, labels = data

            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()  # Setting the parameter gradients to zero
            outputs = net(inputs)  # Forward pass

            labels = labels.float()

            loss = criterion(outputs, labels)  # Applying the criterion
            loss.backward()  # Backward pass
            optimizer.step()  # Optimization step

            running_loss += loss.item()  # Updating the running loss
            train_loss += loss.item()

            if i % mini_batch_size == mini_batch_size - 1:  # Printing the running loss
                print(f"[epoch: {epoch + 1}, mini-batch: {i + 1}, time-taken: {round(time.time() - start_time_mini_batch, 3)} sec] loss: {round(running_loss / mini_batch_size, 6)} ")

                # write on the summary writer
                writer.add_scalar(
                    'Loss/Running', running_loss / mini_batch_size, i)

                running_loss = 0.0
                start_time_mini_batch = time.time()


        net.eval().to(device)

        # Inside the validation loop
        with torch.no_grad():
            net.eval()  # Set the model to evaluation mode
            all_labels = []
            all_outputs = []

            for i, data in enumerate(validation_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)  # Forward pass

                loss = criterion(outputs, labels) # Applying the criterion
                validation_loss += loss.item() # Check the loss

                # Use argmax to get the index of the predicted class
                predicted_class = torch.argmax(outputs, dim=1)

                # Append predictions and labels for accuracy calculation
                all_outputs.extend(F.one_hot(predicted_class, num_classes=num_classes).cpu().numpy())
                all_labels.extend(labels.cpu().numpy())


            # Convert lists to numpy arrays for easier computation
            all_labels = np.array(all_labels)
            all_outputs = np.array(all_outputs)
            
            print(all_outputs, all_labels)

            val_accuracy = accuracy_score(all_labels, all_outputs)
            val_precision = precision_score(all_labels, all_outputs, average='weighted', zero_division=1.0)
            val_recall = recall_score(all_labels, all_outputs, average='weighted', zero_division=1.0)
            val_f1 = f1_score(all_labels, all_outputs, average='weighted', zero_division=1.0)

            # Print or log the accuracy and validation loss
            print(f'+++ [\033[1mepoch: {epoch + 1}\033[0m, validation - \033[91maccuracy: {val_accuracy:.5f}\033[0m, \033[93mprecision: {val_precision:.5f}\033[0m, \033[94mrecall: {val_recall:.5f}\033[0m, \033[95mf1-score: {val_f1:.5f}\033[0m] +++')

        # Switch back to training mode for the next epoch
        net.train().to(device)

        print('+++ [epoch: %d, training loss: %.5f, validation loss: %.5f] +++' %
              (epoch + 1,
               train_loss / len(train_loader),
               validation_loss / len(validation_loader)))

        print(
            f"--- time-taken for epoch {epoch+1}: {round(time.time() - start_time_epoch, 3)} seconds ---")
        start_time_epoch = time.time()

        # Saving the loss
        losses.at[epoch, 'running_loss'] = running_loss
        losses.at[epoch, 'train_loss'] = train_loss
        losses.at[epoch, 'valid_loss'] = validation_loss

        # Write on the summary writer
        writer.add_scalar('Loss/Train', train_loss / len(train_loader), epoch)
        writer.add_scalar('Loss/Validation', validation_loss /
                          len(validation_loader), epoch)

        # Update the learning rate
        if lr_scheduler.__class__.__name__ == "CosineAnnealingWarmRestarts" and lr_scheduler is not None:
            print(f"\033[90m--- current LR: {round(lr_scheduler.get_last_lr()[0], 9)} ---\033[0m")
            lr_scheduler.step()  # step scheduler learning rate

        if min_validation_loss > (validation_loss / len(validation_loader)):
            print(f'\033[92m+++ [validation loss decreased ({min_validation_loss:.9f} -> {(validation_loss / len(validation_loader)):.9f}), saving the model ...] +++\033[0m')
            min_validation_loss = validation_loss / len(validation_loader)

            # Check if the directory exists, and if not, create it
            save_dir = f'./runs/models/{(net.__class__.__name__).lower()}'
            os.makedirs(save_dir, exist_ok=True)

            # Save State Dict
            torch.save(net.state_dict(), f'{save_dir}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth')

        # Check if early stopping criteria is fulfilled
        if early_stopping.early_stop(validation_loss):
            break

    pickle.dump(losses, open(
        f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_loss.pkl', 'wb'))
    writer.close()
    print(f"[i] Finished Training")

    # Create DataLoader for training set
    train_loader = DataLoader(
        train_set, batch_size=batch_size, shuffle=True, num_workers=1)

    # Create DataLoader for validation set
    validation_loader = DataLoader(
        validation_set, batch_size=batch_size, shuffle=True, num_workers=1)

In [22]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleLinearNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleLinearNet, self).__init__()
        self.fc1 = nn.Linear(in_features=input_size, out_features=256)
        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.fc3 = nn.Linear(in_features=128, out_features=64)
        self.fc4 = nn.Linear(in_features=64, out_features=32)
        self.fc5 = nn.Linear(in_features=32, out_features=16)
        self.fc6 = nn.Linear(in_features=16, out_features=8)
        self.fc7 = nn.Linear(in_features=8, out_features=num_classes)

        self.dropout = nn.Dropout(p=0.3)
    
    def forward(self, x):
        x = self.dropout(F.tanh(self.fc1(x)))
        x = self.dropout(F.tanh(self.fc2(x)))
        x = self.dropout(F.tanh(self.fc3(x)))
        x = self.dropout(F.elu(self.fc4(x)))
        x = self.dropout(F.elu(self.fc5(x)))
        x = self.dropout(F.elu(self.fc6(x)))
        x = F.softmax(self.fc7(x), dim=1)
        return x

# Create and print the model
model = SimpleLinearNet(input_size=input_size, num_classes=num_classes).to(device)
summary(model, (input_size,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 256]           3,584
           Dropout-2                  [-1, 256]               0
            Linear-3                  [-1, 128]          32,896
           Dropout-4                  [-1, 128]               0
            Linear-5                   [-1, 64]           8,256
           Dropout-6                   [-1, 64]               0
            Linear-7                   [-1, 32]           2,080
           Dropout-8                   [-1, 32]               0
            Linear-9                   [-1, 16]             528
          Dropout-10                   [-1, 16]               0
           Linear-11                    [-1, 8]             136
          Dropout-12                    [-1, 8]               0
           Linear-13                    [-1, 3]              27
Total params: 47,507
Trainable params: 

In [27]:
net = SimpleLinearNet(input_size=input_size,num_classes=num_classes).to(device)
train_flag = True # Dont run if False
name = "simple_net"

if train_flag:
    print(f"[i] Traing the network {net.__class__.__name__} ...")

    # Learning rate
    learning_rate = 3e-3
    
    # Define your criterion (e.g., CrossEntropyLoss for multiclass classification)
    criterion = nn.CrossEntropyLoss().to(device)
    
    # Define your optimizer (e.g., Adam)
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    # Cosine Annealing with Restarts (CWR) scheduler.
    # This scheduler is designed to automatically adjust the learning rate according to a cosine wave, and can be used to adjust the learning rate as the model converges.
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=round((1/10)*batch_size), T_mult=1, eta_min=0)

    # Train the model
    train(net, train_loader, validation_loader, num_epochs, batch_size, mini_batch_size, optimizer, scheduler, criterion, name=name)
else: # load the model
    print(f"[i] Loading the network {net.__class__.__name__} ...")
    #Loading existing models (with saved weights)
    net.load_state_dict(torch.load(f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth', map_location=device)) #using saved data if present
    net.eval()

[i] Traing the network SimpleLinearNet ...
[epoch: 1, mini-batch: 16, time-taken: 0.008 sec] loss: 0.988147 
[epoch: 1, mini-batch: 32, time-taken: 0.004 sec] loss: 0.953411 
[epoch: 1, mini-batch: 48, time-taken: 0.007 sec] loss: 0.939653 
[epoch: 1, mini-batch: 64, time-taken: 0.008 sec] loss: 0.936545 
[[1 0 0]
 [1 0 0]
 [1 0 0]
 ...
 [1 0 0]
 [1 0 0]
 [1 0 0]] [[1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 ...
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 1. 0.]]
+++ [[1mepoch: 1[0m, validation - [91maccuracy: 0.61236[0m, [93mprecision: 0.76262[0m, [94mrecall: 0.61236[0m, [95mf1-score: 0.46514[0m] +++
+++ [epoch: 1, training loss: 0.96656, validation loss: 0.94394] +++
--- time-taken for epoch 1: 17.028 seconds ---
[90m--- current LR: 0.003 ---[0m
[92m+++ [validation loss decreased (inf -> 0.943937348), saving the model ...] +++[0m
[epoch: 2, mini-batch: 16, time-taken: 0.003 sec] loss: 0.976523 
[epoch: 2, mini-batch: 32, time-taken: 0.005 sec] loss: 0.946392 
[epoch: 2, mini-batch: 48, time-t

In [21]:
class MoreComplexNet(nn.Module):
    def __init__(self, input_dim, num_classes, init_fn):
        super(MoreComplexNet, self).__init__()
        self.init_fn = init_fn
        
        self.fc1 = nn.Linear(in_features=input_dim, out_features=256)
        self.bn1 = nn.BatchNorm1d(256)

        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.bn2 = nn.BatchNorm1d(128)

        self.fc3 = nn.Linear(in_features=128, out_features=64)
        self.bn3 = nn.BatchNorm1d(64)

        self.fc4 = nn.Linear(in_features=64, out_features=32)
        self.bn4 = nn.BatchNorm1d(32)

        self.fc5 = nn.Linear(in_features=32, out_features=16)
        self.bn5 = nn.BatchNorm1d(16)

        self.fc6 = nn.Linear(in_features=16, out_features=8)
        self.bn6 = nn.BatchNorm1d(8)

        self.fc7 = nn.Linear(in_features=8, out_features=num_classes)

        self.dropout = nn.Dropout(p=0.3)

        self.apply(self.init_weights)

    def init_weights(self, m):
        if isinstance(m, nn.Linear):
            self.init_fn(m.weight)
            m.bias.data.fill_(0.01)

    def forward(self, x):
        x = self.dropout(F.tanh(self.bn1(self.fc1(x))))
        x = self.dropout(F.elu(self.bn2(self.fc2(x))))
        x = self.dropout(F.elu(self.bn3(self.fc3(x))))
        x = self.dropout(F.elu(self.bn4(self.fc4(x))))
        x = self.dropout(F.elu(self.bn5(self.fc5(x))))
        x = self.dropout(F.elu(self.bn6(self.fc6(x))))
        x = F.softmax(self.fc7(x), dim=1)
        return x
    
# Create and print the model
model = MoreComplexNet(input_size, num_classes=num_classes, init_fn=torch.nn.init.xavier_normal_).to(device)
summary(model, (input_size,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 256]           3,584
       BatchNorm1d-2                  [-1, 256]             512
           Dropout-3                  [-1, 256]               0
            Linear-4                  [-1, 128]          32,896
       BatchNorm1d-5                  [-1, 128]             256
           Dropout-6                  [-1, 128]               0
            Linear-7                   [-1, 64]           8,256
       BatchNorm1d-8                   [-1, 64]             128
           Dropout-9                   [-1, 64]               0
           Linear-10                   [-1, 32]           2,080
      BatchNorm1d-11                   [-1, 32]              64
          Dropout-12                   [-1, 32]               0
           Linear-13                   [-1, 16]             528
      BatchNorm1d-14                   

In [22]:
net = MoreComplexNet(input_dim=input_size, num_classes=num_classes,
                     init_fn=torch.nn.init.xavier_normal_).to(device)
train_flag = True
name = "more_complex_net"

if train_flag:
    print(f"[i] Traing the network {net.__class__.__name__} ...")

    # Learning rate
    learning_rate = 5e-3

    # Define your criterion (e.g., CrossEntropyLoss for multiclass classification)
    criterion = nn.CrossEntropyLoss().to(device)

    # Define your optimizer (e.g., Adam)
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    # Cosine Annealing with Restarts (CWR) scheduler.
    # This scheduler is designed to automatically adjust the learning rate according to a cosine wave, and can be used to adjust the learning rate as the model converges.
    scheduler = CosineAnnealingWarmRestarts(
        optimizer, T_0=round((1/10)*batch_size), T_mult=1, eta_min=0)

    # Train the model
    train(net, train_loader, validation_loader, num_epochs, batch_size,
          mini_batch_size, optimizer, scheduler, criterion, name=name)
else:  # load the model
    print(f"[i] Loading the network {net.__class__.__name__} ...")
    #Loading existing models (with saved weights)
    net.load_state_dict(torch.load(f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth', map_location=device)) #using saved data if present
    net.eval()

# Best loss found 0.9

[i] Traing the network MoreComplexNet ...
[epoch: 1, mini-batch: 16, time-taken: 0.009 sec] loss: 1.059618 
[epoch: 1, mini-batch: 32, time-taken: 0.007 sec] loss: 1.009864 
[epoch: 1, mini-batch: 48, time-taken: 0.007 sec] loss: 0.984653 
[epoch: 1, mini-batch: 64, time-taken: 0.009 sec] loss: 0.98534 


ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 256])

In [None]:
class EvenMoreComplexNet(nn.Module):
    def __init__(self, input_dim, num_classes, init_fn):
        super(EvenMoreComplexNet, self).__init__()
        self.init_fn = init_fn
        
        self.fc1 = nn.Linear(in_features=input_dim, out_features=1024)
        self.bn1 = nn.BatchNorm1d(1024)

        self.fc2 = nn.Linear(in_features=1024, out_features=512)
        self.bn2 = nn.BatchNorm1d(512)

        self.fc3 = nn.Linear(in_features=512, out_features=256)
        self.bn3 = nn.BatchNorm1d(256)

        self.fc4 = nn.Linear(in_features=256, out_features=128)
        self.bn4 = nn.BatchNorm1d(128)

        self.fc5 = nn.Linear(in_features=128, out_features=64)
        self.bn5 = nn.BatchNorm1d(64)

        self.fc6 = nn.Linear(in_features=64, out_features=32)
        self.bn6 = nn.BatchNorm1d(32)

        self.fc7 = nn.Linear(in_features=32, out_features=16)
        self.bn7 = nn.BatchNorm1d(16)

        self.fc8 = nn.Linear(in_features=16, out_features=8)
        self.bn8 = nn.BatchNorm1d(8)

        self.fc_out = nn.Linear(in_features=8, out_features=num_classes)

        self.dropout = nn.Dropout(p=0.3)

        self.apply(self.init_weights)

    def init_weights(self, m):
        if isinstance(m, nn.Linear):
            self.init_fn(m.weight)
            m.bias.data.fill_(0.01)

    def forward(self, x):
        x = self.dropout(F.tanh(self.bn1(self.fc1(x))))
        x = self.dropout(F.elu(self.bn2(self.fc2(x))))
        x = self.dropout(F.elu(self.bn3(self.fc3(x))))
        x = self.dropout(F.elu(self.bn4(self.fc4(x))))
        x = self.dropout(F.elu(self.bn5(self.fc5(x))))
        x = self.dropout(F.elu(self.bn6(self.fc6(x))))
        x = self.dropout(F.elu(self.bn7(self.fc7(x))))
        x = self.dropout(F.elu(self.bn8(self.fc8(x))))
        x = F.softmax(self.fc_out(x), dim=1)
        return x

# Create and print the model
model = EvenMoreComplexNet(input_size, num_classes=num_classes, init_fn=torch.nn.init.xavier_normal_).to(device)
summary(model, (input_size,))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1024]          14,336
       BatchNorm1d-2                 [-1, 1024]           2,048
           Dropout-3                 [-1, 1024]               0
            Linear-4                  [-1, 512]         524,800
       BatchNorm1d-5                  [-1, 512]           1,024
           Dropout-6                  [-1, 512]               0
            Linear-7                  [-1, 256]         131,328
       BatchNorm1d-8                  [-1, 256]             512
           Dropout-9                  [-1, 256]               0
           Linear-10                  [-1, 128]          32,896
      BatchNorm1d-11                  [-1, 128]             256
          Dropout-12                  [-1, 128]               0
           Linear-13                   [-1, 64]           8,256
      BatchNorm1d-14                   

In [None]:
net = EvenMoreComplexNet(input_dim=input_size, num_classes=num_classes,
                     init_fn=torch.nn.init.xavier_normal_).to(device)
train_flag = True
name = "even_more_complex_net"

if train_flag:
    print(f"[i] Traing the network {net.__class__.__name__} ...")

    # Learning rate
    learning_rate = 5e-3

    # Define your criterion (e.g., CrossEntropyLoss for multiclass classification)
    criterion = nn.CrossEntropyLoss().to(device)

    # Define your optimizer (e.g., Adam)
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    # Cosine Annealing with Restarts (CWR) scheduler.
    # This scheduler is designed to automatically adjust the learning rate according to a cosine wave, and can be used to adjust the learning rate as the model converges.
    scheduler = CosineAnnealingWarmRestarts(
        optimizer, T_0=round((1/10)*batch_size), T_mult=1, eta_min=0)

    # Train the model
    train(net, train_loader, validation_loader, num_epochs, batch_size,
          mini_batch_size, optimizer, scheduler, criterion, name=name)
else:  # load the model
    print(f"[i] Loading the network {net.__class__.__name__} ...")
    #Loading existing models (with saved weights)
    net.load_state_dict(torch.load(f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth', map_location=device)) #using saved data if present
    net.eval()

[i] Traing the network EvenMoreComplexNet ...
[epoch: 1, mini-batch: 16, time-taken: 0.016 sec] loss: 1.079372 
[epoch: 1, mini-batch: 32, time-taken: 0.02 sec] loss: 1.031443 
[epoch: 1, mini-batch: 48, time-taken: 0.017 sec] loss: 1.004605 
[epoch: 1, mini-batch: 64, time-taken: 0.016 sec] loss: 0.96853 
+++ [[1mepoch: 1[0m, validation - [91maccuracy: 0.71681[0m, [93mprecision: 0.57978[0m, [94mrecall: 0.54661[0m, [95mf1-score: 0.56270[0m] +++
+++ [epoch: 1, training loss: 1.01957, validation loss: 0.97003] +++
--- time-taken for epoch 1: 7.625 seconds ---
[90m--- current LR: 0.005 ---[0m
[92m+++ [validation loss decreased (inf -> 0.970033355), saving the model ...] +++[0m
[epoch: 2, mini-batch: 16, time-taken: 0.017 sec] loss: 0.982261 
[epoch: 2, mini-batch: 32, time-taken: 0.023 sec] loss: 0.968431 
[epoch: 2, mini-batch: 48, time-taken: 0.021 sec] loss: 0.935523 
[epoch: 2, mini-batch: 64, time-taken: 0.013 sec] loss: 0.970363 
+++ [[1mepoch: 2[0m, validation - [9

In [None]:
class SimplestNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, init_fn):
        super(SimplestNet, self).__init__()

        self.init_fn = init_fn
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        self.dropout = nn.Dropout(p=0.3)

        self.apply(self.init_weights)

    def init_weights(self, m):
        if isinstance(m, nn.Linear):
            self.init_fn(m.weight)
            m.bias.data.fill_(0.01)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return F.softmax(x, dim=1)

# Create and print the model
model = SimplestNet(input_size=input_size, hidden_size=512, num_classes=num_classes, init_fn=torch.nn.init.xavier_normal_).to(device)
summary(model, (input_size,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 512]           7,168
           Dropout-2                  [-1, 512]               0
            Linear-3                  [-1, 512]         262,656
           Dropout-4                  [-1, 512]               0
            Linear-5                    [-1, 3]           1,539
Total params: 271,363
Trainable params: 271,363
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 1.04
Estimated Total Size (MB): 1.05
----------------------------------------------------------------


In [None]:
net = SimplestNet(input_size=input_size, hidden_size=512, num_classes=num_classes, init_fn=torch.nn.init.xavier_normal_).to(device)
train_flag = True
name = "simplest_net"

if train_flag:
    print(f"[i] Traing the network {net.__class__.__name__} ...")

    # Learning rate
    learning_rate = 1e-3

    # Define your criterion (e.g., CrossEntropyLoss for multiclass classification)
    criterion = nn.CrossEntropyLoss().to(device)

    # Define your optimizer (e.g., Adam)
    optimizer = optim.RAdam(net.parameters(), lr=learning_rate)

    # Cosine Annealing with Restarts (CWR) scheduler.
    # This scheduler is designed to automatically adjust the learning rate according to a cosine wave, and can be used to adjust the learning rate as the model converges.
    scheduler = CosineAnnealingWarmRestarts(
        optimizer, T_0=round((1/10)*batch_size), T_mult=1, eta_min=0)

    # Train the model
    train(net, train_loader, validation_loader, num_epochs, batch_size,
          mini_batch_size, optimizer, scheduler, criterion, name=name)
else:  # load the model
    print(f"[i] Loading the network {net.__class__.__name__} ...")
    #Loading existing models (with saved weights)
    net.load_state_dict(torch.load(f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth', map_location=device)) #using saved data if present
    net.eval()

[i] Traing the network SimplestNet ...
[epoch: 1, mini-batch: 16, time-taken: 0.01 sec] loss: 1.087399 
[epoch: 1, mini-batch: 32, time-taken: 0.008 sec] loss: 1.061582 
[epoch: 1, mini-batch: 48, time-taken: 0.009 sec] loss: 1.034582 
[epoch: 1, mini-batch: 64, time-taken: 0.006 sec] loss: 0.997247 
+++ [[1mepoch: 1[0m, validation - [91maccuracy: 0.75000[0m, [93mprecision: 0.62500[0m, [94mrecall: 0.62500[0m, [95mf1-score: 0.62500[0m] +++
+++ [epoch: 1, training loss: 1.04151, validation loss: 0.96434] +++
--- time-taken for epoch 1: 6.85 seconds ---
[90m--- current LR: 0.001 ---[0m
[92m+++ [validation loss decreased (inf -> 0.964342922), saving the model ...] +++[0m
[epoch: 2, mini-batch: 16, time-taken: 0.01 sec] loss: 0.95369 
[epoch: 2, mini-batch: 32, time-taken: 0.008 sec] loss: 0.944112 
[epoch: 2, mini-batch: 48, time-taken: 0.009 sec] loss: 0.942537 
[epoch: 2, mini-batch: 64, time-taken: 0.009 sec] loss: 0.951701 
+++ [[1mepoch: 2[0m, validation - [91maccurac