In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, csv file I/O (e.g. pd.read_csv)
import os # deal with os primitives
import time # time-related functions
import matplotlib.pyplot as plt # plots
import pickle # object serialization
from collections import Counter # dict subclass for counting hashable items
from tqdm import tqdm # iterable object
import random # random values generator
import seaborn as sns # prettier plots
import torch # main package for PyTorch
import torch.utils.data as data_utils # access data sets, including pre-processing, loading, and splitting
from torch.utils.data import random_split # randomly split a dataset 
import torch.optim as optim # optimization algorithms
import torch.nn as nn # build neural network (layers, activations, loss functions)
import torch.nn.functional as F # functions used to build neural network
from torchsummary import summary # print the summary of a neural network model
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts # scheduler used to adjust the learning rate
from torch.utils.tensorboard.writer import SummaryWriter # nn log writer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import torch

# set the random seed for reproducibility
torch.manual_seed(1111)
torch.cuda.manual_seed(1111)
np.random.seed(1111)
random.seed(1111)


In [3]:
import pandas as pd

# Load the train set
df_train = pd.read_pickle("df_train.pkl")

# Load the test set
df_test = pd.read_pickle("df_test.pkl")

# Define a function to extract the matrix and target from a DataFrame
def extract_matrix_and_target(df):
    matrix_column = "graph"
    target_column = "label_time"

    matrices = df[matrix_column].tolist()
    targets = df[target_column].tolist()

    return matrices, targets

# Extract matrix and target for train set
train_graphs, train_targets = extract_matrix_and_target(df_train)

# Extract matrix and target for test set
test_graphs, test_targets = extract_matrix_and_target(df_test)

# Display the first matrix and target in the train set
print("First matrix in train set:")
print(train_graphs[0])

print("\nFirst target in train set:")
print(train_targets[0])


First matrix in train set:
Graph with 18 nodes and 29 edges

First target in train set:
best_first_search


In [17]:
import networkx as nx
import numpy as np
import statistics
import math

def calculate_graph_size_features(graph):
    num_nodes = len(graph.nodes)
    num_edges = len(graph.edges)

    return {
        'nodes': num_nodes,
        'edges': num_edges,
        'ratio_n_m': num_nodes / num_edges if num_edges != 0 else 0,
        'ratio_m_n': num_edges / num_nodes if num_nodes != 0 else 0,
        'density': 2 * num_edges / (num_nodes * (num_nodes - 1)) if num_nodes > 1 else 0
    }

def calculate_node_degree_stats(graph):
    degrees = list(dict(graph.degree()).values())
    degrees_np = np.array(degrees)

    unique_degrees, counts = np.unique(degrees_np, return_counts=True)

    entropy_term = np.fromiter((p * np.log2(p) for p in (np.count_nonzero(degrees_np == x) / len(degrees_np) for x in set(degrees_np))), dtype=float)
    
    return {
        'min_degree': np.min(degrees_np),
        'max_degree': np.max(degrees_np),
        'mean_degree': np.mean(degrees_np),
        'median_degree': np.median(degrees_np),
        'q0.25_degree': np.percentile(degrees_np, 25),
        'q0.75_degree': np.percentile(degrees_np, 75),
        'variation_coefficient_degree': np.std(degrees_np) / np.mean(degrees_np),
        'entropy_degree': -np.sum(entropy_term)
    }

def calculate_maximal_clique_stats(graph, num_nodes):
    cliques = list(map(len, nx.enumerate_all_cliques(graph)))
    return {
        'normalized_min_clique_size': min(cliques) / num_nodes,
        'normalized_max_clique_size': max(cliques) / num_nodes,
        'normalized_median_clique_size': statistics.median(cliques) / num_nodes,
        'normalized_q0.25_clique_size': statistics.quantiles(cliques, n=4)[0] / num_nodes,
        'normalized_q0.75_clique_size': statistics.quantiles(cliques, n=4)[2] / num_nodes,
        'normalized_variation_coefficient_clique_size': statistics.stdev(cliques) / statistics.mean(cliques),
        'normalized_entropy_clique_size': -sum(p * math.log2(p) for p in (cliques.count(x) / len(cliques) for x in set(cliques))) / num_nodes
    }

In [None]:
def calculate_graph_size_features(graph):
    num_nodes = len(graph.nodes)
    num_edges = len(graph.edges)

    # Check for division by zero
    if num_edges != 0 and num_nodes != 0:
        return {
            'nodes': num_nodes,
            'edges': num_edges,
            'ratio_n_m': num_nodes / num_edges,
            'ratio_m_n': num_edges / num_nodes,
            'density': 2 * num_edges / (num_nodes * (num_nodes - 1))
        }
    else:
        # Handle the case where either num_edges or num_nodes is zero
        return {
            'nodes': num_nodes,
            'edges': num_edges,
            'ratio_n_m': 0,
            'ratio_m_n': 0,
            'density': 0
        }

def calculate_node_degree_stats(graph):
    degrees = list(dict(graph.degree()).values())
    degrees_np = np.array(degrees)

    # Check for diversity in degrees
    if len(set(degrees_np)) > 1:
        unique_degrees, counts = np.unique(degrees_np, return_counts=True)

        entropy_term = np.fromiter((p * np.log2(p) for p in (np.count_nonzero(degrees_np == x) / len(degrees_np) for x in set(degrees_np))), dtype=float)

        return {
            'min_degree': np.min(degrees_np),
            'max_degree': np.max(degrees_np),
            'mean_degree': np.mean(degrees_np),
            'median_degree': np.median(degrees_np),
            'q0.25_degree': np.percentile(degrees_np, 25),
            'q0.75_degree': np.percentile(degrees_np, 75),
            'variation_coefficient_degree': np.std(degrees_np) / np.mean(degrees_np),
            'entropy_degree': -np.sum(entropy_term)
        }
    else:
        # Handle the case where there is no diversity in degrees
        return {
            'min_degree': 0,
            'max_degree': 0,
            'mean_degree': 0,
            'median_degree': 0,
            'q0.25_degree': 0,
            'q0.75_degree': 0,
            'variation_coefficient_degree': 0,
            'entropy_degree': 0
        }


def calculate_maximal_clique_stats(graph, num_nodes):
    cliques = list(map(len, nx.enumerate_all_cliques(graph)))

    # Check for diversity in cliques
    if len(set(cliques)) > 1:
        return {
            'normalized_min_clique_size': min(cliques) / num_nodes,
            'normalized_max_clique_size': max(cliques) / num_nodes,
            'normalized_median_clique_size': statistics.median(cliques) / num_nodes,
            'normalized_q0.25_clique_size': statistics.quantiles(cliques, n=4)[0] / num_nodes,
            'normalized_q0.75_clique_size': statistics.quantiles(cliques, n=4)[2] / num_nodes,
            'normalized_variation_coefficient_clique_size': statistics.stdev(cliques) / statistics.mean(cliques),
            'normalized_entropy_clique_size': -sum(p * math.log2(p) for p in (cliques.count(x) / len(cliques) for x in set(cliques))) / num_nodes
        }
    else:
        # Handle the case where there is no diversity in cliques
        return {
            'normalized_min_clique_size': 0,
            'normalized_max_clique_size': 0,
            'normalized_median_clique_size': 0,
            'normalized_q0.25_clique_size': 0,
            'normalized_q0.75_clique_size': 0,
            'normalized_variation_coefficient_clique_size': 0,
            'normalized_entropy_clique_size': 0
        }


In [27]:
# Loop through each graph in the train set
train_graph_metrics = []
for graph, target in zip(train_graphs, train_targets):
    size_features = calculate_graph_size_features(graph)
    degree_stats = calculate_node_degree_stats(graph)
    clique_stats = calculate_maximal_clique_stats(graph, len(graph.nodes()))

    # Combine the metrics into one dictionary
    graph_metrics = {**size_features, **degree_stats, **clique_stats, 'target': target}
    
    train_graph_metrics.append(graph_metrics)

# Loop through each graph in the test set
test_graph_metrics = []
for graph, target in zip(test_graphs, test_targets):
    graph_array = nx.to_numpy_array(graph)
    size_features = calculate_graph_size_features(graph)
    degree_stats = calculate_node_degree_stats(graph)
    clique_stats = calculate_maximal_clique_stats(graph, graph_array.shape[0])  # Use graph_array.shape[0] for the number of nodes
    
    # Combine the metrics into one dictionary
    graph_metrics = {**size_features, **degree_stats, **clique_stats, 'target': target}
    
    test_graph_metrics.append(graph_metrics)

# Convert train_graph_metrics and test_graph_metrics to DataFrames
train_df = pd.DataFrame(train_graph_metrics)
test_df = pd.DataFrame(test_graph_metrics)


  'variation_coefficient_degree': np.std(degrees_np) / np.mean(degrees_np),


In [28]:
# Display the first few rows of each DataFrame for verification
print("Train DataFrame:")
train_df.head()


Train DataFrame:


Unnamed: 0,nodes,edges,ratio_n_m,ratio_m_n,density,min_degree,max_degree,mean_degree,median_degree,q0.25_degree,...,variation_coefficient_degree,entropy_degree,normalized_min_clique_size,normalized_max_clique_size,normalized_median_clique_size,normalized_q0.25_clique_size,normalized_q0.75_clique_size,normalized_variation_coefficient_clique_size,normalized_entropy_clique_size,target
0,18,29,0.62069,1.611111,0.189542,1,6,3.222222,3.0,2.0,...,0.491304,2.530493,0.055556,0.166667,0.111111,0.055556,0.111111,0.364807,0.077338,best_first_search
1,39,108,0.361111,2.769231,0.145749,2,11,5.538462,5.0,4.0,...,0.37216,2.820963,0.025641,0.102564,0.051282,0.051282,0.051282,0.326926,0.035698,best_first_search
2,13,8,1.625,0.615385,0.102564,0,3,1.230769,1.0,0.0,...,0.910014,1.884314,0.076923,0.153846,0.076923,0.076923,0.153846,0.360341,0.073747,dfs
3,36,115,0.313043,3.194444,0.18254,3,12,6.388889,6.0,5.0,...,0.342679,2.971077,0.027778,0.111111,0.055556,0.055556,0.055556,0.316642,0.038383,best_first_search
4,32,121,0.264463,3.78125,0.243952,4,12,7.5625,8.0,6.0,...,0.260167,2.885391,0.03125,0.125,0.0625,0.0625,0.09375,0.306671,0.044608,best_first_search


In [29]:
print("\nTest DataFrame:")
test_df.head()


Test DataFrame:


Unnamed: 0,nodes,edges,ratio_n_m,ratio_m_n,density,min_degree,max_degree,mean_degree,median_degree,q0.25_degree,...,variation_coefficient_degree,entropy_degree,normalized_min_clique_size,normalized_max_clique_size,normalized_median_clique_size,normalized_q0.25_clique_size,normalized_q0.75_clique_size,normalized_variation_coefficient_clique_size,normalized_entropy_clique_size,target
0,9,2,4.5,0.222222,0.055556,0,1,0.444444,0.0,0.0,...,1.118034,0.991076,0.111111,0.222222,0.111111,0.111111,0.111111,0.342286,0.076004,dfs
1,23,41,0.560976,1.782609,0.162055,1,7,3.565217,4.0,2.0,...,0.451714,2.522866,0.043478,0.130435,0.086957,0.043478,0.086957,0.34972,0.058293,best_first_search
2,5,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,...,,-0.0,0.2,0.2,0.2,0.2,0.2,0.0,-0.0,randomized_shortest_path
3,11,0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,...,,-0.0,0.090909,0.090909,0.090909,0.090909,0.090909,0.0,-0.0,randomized_shortest_path
4,32,71,0.450704,2.21875,0.143145,1,7,4.4375,4.0,3.0,...,0.373439,2.565206,0.03125,0.09375,0.0625,0.03125,0.0625,0.321445,0.038946,best_first_search


In [33]:
test_df.columns

Index(['nodes', 'edges', 'ratio_n_m', 'ratio_m_n', 'density', 'min_degree',
       'max_degree', 'mean_degree', 'median_degree', 'q0.25_degree',
       'q0.75_degree', 'variation_coefficient_degree', 'entropy_degree',
       'normalized_min_clique_size', 'normalized_max_clique_size',
       'normalized_median_clique_size', 'normalized_q0.25_clique_size',
       'normalized_q0.75_clique_size',
       'normalized_variation_coefficient_clique_size',
       'normalized_entropy_clique_size', 'target'],
      dtype='object')

In [32]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda')
        print("[i] USING CUDA")
    else:
        device = torch.device('cpu') # don't have GPU 
        print("[i] USING CPU")
    return device

device = get_device() #setting up the DL device

[i] USING CPU


In [96]:
#Choosing the number or epochs and the learning rate
num_epochs = 1000
batch_size = 256
mini_batch_size = 64

In [97]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler

# Assuming train_df and test_df are your dataframes
# Assuming 'target' is your target column name

# Combine train and test datasets for uniform preprocessing
combined_df = pd.concat([train_df, test_df], ignore_index=True)

# Separate features and target
X = combined_df.drop('target', axis=1)
y = combined_df['target']

# Define features to be normalized and one-hot encoded
numeric_features = X.select_dtypes(include=['float64', 'int64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

# Define preprocessing steps for numeric and categorical features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', MinMaxScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Fit and transform data
X_transformed = preprocessor.fit_transform(X)

# Split into train, validation, and test sets
# 90 train 10 validation
# 20% test
X_train_temp, X_test, y_train_temp, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_temp, y_train_temp, test_size=0.1, random_state=42)


# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# One-hot encode the target column
y_train_onehot = pd.get_dummies(y_train)
y_val_onehot = pd.get_dummies(y_val)
y_test_onehot = pd.get_dummies(y_test)

# Convert one-hot encoded targets to PyTorch tensors
y_train_tensor = torch.tensor(y_train_onehot.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_onehot.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_onehot.values, dtype=torch.float32)

# Calculate class weights for the training set
class_sample_count = np.array([len(np.where(y_train_onehot.iloc[:, c] == 1)[0]) for c in range(y_train_onehot.shape[1])])
weight = 1. / class_sample_count
samples_weight = np.array([weight[c] for c in range(y_train_onehot.shape[1])])

# Create a WeightedRandomSampler for the training set
sampler = WeightedRandomSampler(samples_weight, len(samples_weight), replacement=True)

# Create PyTorch datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create PyTorch dataloaders with WeightedRandomSampler for the training set
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, sampler=sampler)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [98]:
num_classes = len(np.unique(y_train))
input_size = X_train.shape[1]

In [99]:
# Initialize counters for each class
class_counts = [0] * y_train_tensor.shape[1]

# Iterate through the entire training dataset without using the sampler
for batch_idx, (_, y_one_hot) in enumerate(DataLoader(train_dataset, batch_size=batch_size, shuffle=False)):
    # Convert one-hot encoded labels to class indices
    y = torch.argmax(y_one_hot, dim=1)

    # Update class counts for each batch
    for class_idx in range(y_train_tensor.shape[1]):
        class_counts[class_idx] += len(torch.where(y == class_idx)[0])

# Print cumulative count of elements in different classes
print("Cumulative Class Counts:", class_counts)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, sampler=sampler)

Cumulative Class Counts: [372, 55, 52, 194, 13, 34]


In [101]:
import time
from sklearn.metrics import confusion_matrix
# define training function

# implement early stopping for training function
# from https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch


class EarlyStopper:
    def __init__(self, patience=1, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss <= self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                print(f"[i] Validation Loss Increased - Early Stop!")
                print(
                    f"--- {validation_loss} > {self.min_validation_loss + self.min_delta} ---")
                return True
        return False


def train(net, train_loader, validation_loader, num_epochs, batch_size, mini_batch_size, optimizer, lr_scheduler, criterion, earlystop_patience=0, earlystop_min_delta=1e-6, name=""):
    # Save the loss into a dataframe
    losses = pd.DataFrame(index=list(range(num_epochs)), columns=[
                          'running_loss', 'train_loss', 'valid_loss'])
    min_validation_loss = np.inf

    # Use a summary writer to check loss in real time
    current_time = time.strftime("%Y%m%d_%H%M%S")
    writer = SummaryWriter(
        f'runs/tensorboard/{current_time}_{(net.__class__.__name__).lower()}_{name}')

    # Set early stopping parameters
    # from https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
    early_stopping = EarlyStopper(
        patience=earlystop_patience, min_delta=earlystop_min_delta)

    start_time_epoch = time.time()

    net.train()
    net.to(device)  # Move the model to the specified device

    for epoch in range(num_epochs):  # Looping over the dataset

        running_loss = 0.0
        validation_loss = 0.0
        train_loss = 0.0

        

        net.train()  # Set the model to training mode

        for i, data in enumerate(train_loader):
            start_time_mini_batch = time.time()
            inputs, labels = data

            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()  # Setting the parameter gradients to zero
            outputs = net(inputs)  # Forward pass

            labels = labels.float()

            loss = criterion(outputs, labels)  # Applying the criterion
            loss.backward()  # Backward pass
            optimizer.step()  # Optimization step

            running_loss += loss.item()  # Updating the running loss
            train_loss += loss.item()

            if i % mini_batch_size == mini_batch_size - 1:  # Printing the running loss
                print(f"[epoch: {epoch + 1}, mini-batch: {i + 1}, time-taken: {round(time.time() - start_time_mini_batch, 3)} sec] loss: {round(running_loss / mini_batch_size, 6)} ")

                # write on the summary writer
                writer.add_scalar(
                    'Loss/Running', running_loss / mini_batch_size, i)

                running_loss = 0.0
                start_time_mini_batch = time.time()


        net.eval().to(device)

        # Inside the validation loop
        with torch.no_grad():
            net.eval()  # Set the model to evaluation mode
            all_labels = []
            all_outputs = []

            for i, data in enumerate(validation_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)  # Forward pass

                loss = criterion(outputs, labels) # Applying the criterion
                validation_loss += loss.item() # Check the loss

                # Convert probabilities to binary predictions using a threshold (e.g., 0.5)
                threshold = 0.5
                binary_predictions = (outputs > threshold).float()

                # Append predictions and labels for accuracy calculation
                all_outputs.extend(binary_predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

            # Convert lists to numpy arrays for easier computation
            all_labels = np.array(all_labels)
            all_outputs = np.array(all_outputs)

            # Compute confusion matrix
            conf_matrix = confusion_matrix(
                all_labels.flatten(), all_outputs.flatten())

            # Extract TP, TN, FP, FN from the confusion matrix
            TP = conf_matrix[1, 1]
            TN = conf_matrix[0, 0]
            FP = conf_matrix[0, 1]
            FN = conf_matrix[1, 0]

            # Check if denominators are zero
            precision_denominator = TP + FP
            recall_denominator = TP + FN

            # Calculate accuracy, precision, recall, and F1-score
            validation_accuracy = (TP + TN) / (TP + TN + FP + FN)

            # Check if the denominator is not zero before performing division
            validation_precision = TP / precision_denominator if precision_denominator != 0 else 0
            validation_recall = TP / recall_denominator if recall_denominator != 0 else 0

            # Check if both precision and recall are not zero before performing division
            if validation_precision + validation_recall != 0:
                validation_f1_score = 2 * (validation_precision * validation_recall) / (validation_precision + validation_recall)
            else:
                validation_f1_score = 0

            # Print or log the accuracy and validation loss
            print(f'+++ [\033[1mepoch: {epoch + 1}\033[0m, validation - \033[91maccuracy: {validation_accuracy:.5f}\033[0m, \033[93mprecision: {validation_precision:.5f}\033[0m, \033[94mrecall: {validation_recall:.5f}\033[0m, \033[95mf1-score: {validation_f1_score:.5f}\033[0m] +++')

        # Switch back to training mode for the next epoch
        net.train().to(device)

        print('+++ [epoch: %d, training loss: %.5f, validation loss: %.5f] +++' %
              (epoch + 1,
               train_loss / len(train_loader),
               validation_loss / len(validation_loader)))

        print(
            f"--- time-taken for epoch {epoch+1}: {round(time.time() - start_time_epoch, 3)} seconds ---")
        start_time_epoch = time.time()

        # Saving the loss
        losses.at[epoch, 'running_loss'] = running_loss
        losses.at[epoch, 'train_loss'] = train_loss
        losses.at[epoch, 'valid_loss'] = validation_loss

        # Write on the summary writer
        writer.add_scalar('Loss/Train', train_loss / len(train_loader), epoch)
        writer.add_scalar('Loss/Validation', validation_loss /
                          len(validation_loader), epoch)
        writer.add_scalar('Accuracy/Validation', validation_accuracy, epoch)

        # Update the learning rate
        if lr_scheduler.__class__.__name__ == "CosineAnnealingWarmRestarts" and lr_scheduler is not None:
            print(f"\033[90m--- current LR: {round(lr_scheduler.get_last_lr()[0], 9)} ---\033[0m")
            lr_scheduler.step()  # step scheduler learning rate

        if min_validation_loss > (validation_loss / len(validation_loader)):
            print(f'\033[92m+++ [validation loss decreased ({min_validation_loss:.9f} -> {(validation_loss / len(validation_loader)):.9f}), saving the model ...] +++\033[0m')
            min_validation_loss = validation_loss / len(validation_loader)

            # Check if the directory exists, and if not, create it
            save_dir = f'./runs/models/{(net.__class__.__name__).lower()}'
            os.makedirs(save_dir, exist_ok=True)

            # Save State Dict
            torch.save(net.state_dict(), f'{save_dir}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth')

        # Check if early stopping criteria is fulfilled
        if early_stopping.early_stop(validation_loss):
            break

    pickle.dump(losses, open(
        f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_loss.pkl', 'wb'))
    writer.close()
    print(f"[i] Finished Training")

In [121]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleLinearNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(SimpleLinearNet, self).__init__()
        self.fc1 = nn.Linear(in_features=input_size, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=512)
        self.fc3 = nn.Linear(in_features=512, out_features=256)
        self.fc4 = nn.Linear(in_features=256, out_features=128)
        self.fc5 = nn.Linear(in_features=128, out_features=64)
        self.fc6 = nn.Linear(in_features=64, out_features=16)
        self.fc7 = nn.Linear(in_features=16, out_features=num_classes)

        self.dropout = nn.Dropout(p=0.3)
    
    def forward(self, x):
        x = self.dropout(F.tanh(self.fc1(x)))
        x = self.dropout(F.tanh(self.fc2(x)))
        x = self.dropout(F.tanh(self.fc3(x)))
        x = self.dropout(F.elu(self.fc4(x)))
        x = self.dropout(F.elu(self.fc5(x)))
        x = self.dropout(F.elu(self.fc6(x)))
        x = F.softmax(self.fc7(x), dim=1)
        return x

# Create and print the model
model = SimpleLinearNet(input_size=input_size, num_classes=num_classes).to(device)
summary(model, (input_size,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1024]          19,456
           Dropout-2                 [-1, 1024]               0
            Linear-3                  [-1, 512]         524,800
           Dropout-4                  [-1, 512]               0
            Linear-5                  [-1, 256]         131,328
           Dropout-6                  [-1, 256]               0
            Linear-7                  [-1, 128]          32,896
           Dropout-8                  [-1, 128]               0
            Linear-9                   [-1, 64]           8,256
          Dropout-10                   [-1, 64]               0
           Linear-11                   [-1, 16]           1,040
          Dropout-12                   [-1, 16]               0
           Linear-13                    [-1, 6]             102
Total params: 717,878
Trainable params:

In [122]:
net = SimpleLinearNet(input_size=input_size,num_classes=num_classes).to(device)
train_flag = True # Dont run if False
name = "simple_net"

if train_flag:
    print(f"[i] Traing the network {net.__class__.__name__} ...")

    # Learning rate
    learning_rate = 3e-3
    
    # Define your criterion (e.g., CrossEntropyLoss for multiclass classification)
    criterion = nn.CrossEntropyLoss().to(device)
    
    # Define your optimizer (e.g., Adam)
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    # Cosine Annealing with Restarts (CWR) scheduler.
    # This scheduler is designed to automatically adjust the learning rate according to a cosine wave, and can be used to adjust the learning rate as the model converges.
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=round((1/10)*batch_size), T_mult=1, eta_min=0)

    # Train the model
    train(net, train_dataloader, val_dataloader, num_epochs, batch_size, mini_batch_size, optimizer, scheduler, criterion, name=name)
else: # load the model
    print(f"[i] Loading the network {net.__class__.__name__} ...")
    #Loading existing models (with saved weights)
    net.load_state_dict(torch.load(f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth', map_location=device)) #using saved data if present
    net.eval()

[i] Traing the network SimpleLinearNet ...
+++ [[1mepoch: 1[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch: 1, training loss: 1.81378, validation loss: 1.78260] +++
--- time-taken for epoch 1: 0.018 seconds ---
[90m--- current LR: 0.003 ---[0m
[92m+++ [validation loss decreased (inf -> 1.782599092), saving the model ...] +++[0m
+++ [[1mepoch: 2[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch: 2, training loss: 1.77847, validation loss: 1.76801] +++
--- time-taken for epoch 2: 0.02 seconds ---
[90m--- current LR: 0.002989063 ---[0m
[92m+++ [validation loss decreased (1.782599092 -> 1.768011451), saving the model ...] +++[0m
+++ [[1mepoch: 3[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch: 3, 

In [103]:
class MoreComplexNet(nn.Module):
    def __init__(self, input_dim, num_classes, init_fn):
        super(MoreComplexNet, self).__init__()
        self.init_fn = init_fn
        
        self.fc1 = nn.Linear(in_features=input_dim, out_features=256)
        self.bn1 = nn.BatchNorm1d(256)

        self.fc2 = nn.Linear(in_features=256, out_features=128)
        self.bn2 = nn.BatchNorm1d(128)

        self.fc3 = nn.Linear(in_features=128, out_features=64)
        self.bn3 = nn.BatchNorm1d(64)

        self.fc4 = nn.Linear(in_features=64, out_features=32)
        self.bn4 = nn.BatchNorm1d(32)

        self.fc5 = nn.Linear(in_features=32, out_features=16)
        self.bn5 = nn.BatchNorm1d(16)

        self.fc6 = nn.Linear(in_features=16, out_features=8)
        self.bn6 = nn.BatchNorm1d(8)

        self.fc7 = nn.Linear(in_features=8, out_features=num_classes)

        self.dropout = nn.Dropout(p=0.3)

        self.apply(self.init_weights)

    def init_weights(self, m):
        if isinstance(m, nn.Linear):
            self.init_fn(m.weight)
            m.bias.data.fill_(0.01)

    def forward(self, x):
        x = self.dropout(F.tanh(self.bn1(self.fc1(x))))
        x = self.dropout(F.elu(self.bn2(self.fc2(x))))
        x = self.dropout(F.elu(self.bn3(self.fc3(x))))
        x = self.dropout(F.elu(self.bn4(self.fc4(x))))
        x = self.dropout(F.elu(self.bn5(self.fc5(x))))
        x = self.dropout(F.elu(self.bn6(self.fc6(x))))
        x = F.softmax(self.fc7(x), dim=1)
        return x
    
# Create and print the model
model = MoreComplexNet(input_size, num_classes=num_classes, init_fn=torch.nn.init.xavier_normal_).to(device)
summary(model, (input_size,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 256]           4,864
       BatchNorm1d-2                  [-1, 256]             512
           Dropout-3                  [-1, 256]               0
            Linear-4                  [-1, 128]          32,896
       BatchNorm1d-5                  [-1, 128]             256
           Dropout-6                  [-1, 128]               0
            Linear-7                   [-1, 64]           8,256
       BatchNorm1d-8                   [-1, 64]             128
           Dropout-9                   [-1, 64]               0
           Linear-10                   [-1, 32]           2,080
      BatchNorm1d-11                   [-1, 32]              64
          Dropout-12                   [-1, 32]               0
           Linear-13                   [-1, 16]             528
      BatchNorm1d-14                   

In [109]:
net = MoreComplexNet(input_dim=input_size, num_classes=num_classes,
                     init_fn=torch.nn.init.xavier_normal_).to(device)
train_flag = True
name = "more_complex_net"

if train_flag:
    print(f"[i] Traing the network {net.__class__.__name__} ...")

    # Learning rate
    learning_rate = 3e-3
    
    # Define your criterion (e.g., CrossEntropyLoss for multiclass classification)
    criterion = nn.CrossEntropyLoss().to(device)
    
    # Define your optimizer (e.g., Adam)
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    # Cosine Annealing with Restarts (CWR) scheduler.
    # This scheduler is designed to automatically adjust the learning rate according to a cosine wave, and can be used to adjust the learning rate as the model converges.
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=round((1/10)*batch_size), T_mult=1, eta_min=0)

    # Train the model
    train(net, train_dataloader, val_dataloader, num_epochs, batch_size, mini_batch_size, optimizer, scheduler, criterion, name=name)
else: # load the model
    print(f"[i] Loading the network {net.__class__.__name__} ...")
    #Loading existing models (with saved weights)
    net.load_state_dict(torch.load(f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth', map_location=device)) #using saved data if present
    net.eval()

[i] Traing the network MoreComplexNet ...
+++ [[1mepoch: 1[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch: 1, training loss: 1.72439, validation loss: 1.77888] +++
--- time-taken for epoch 1: 0.013 seconds ---
[90m--- current LR: 0.003 ---[0m
[92m+++ [validation loss decreased (inf -> 1.778876901), saving the model ...] +++[0m
+++ [[1mepoch: 2[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch: 2, training loss: 1.77008, validation loss: 1.76433] +++
--- time-taken for epoch 2: 0.015 seconds ---
[90m--- current LR: 0.002989063 ---[0m
[92m+++ [validation loss decreased (1.778876901 -> 1.764332533), saving the model ...] +++[0m
+++ [[1mepoch: 3[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch: 3, 

In [110]:
class EvenMoreComplexNet(nn.Module):
    def __init__(self, input_dim, num_classes, init_fn):
        super(EvenMoreComplexNet, self).__init__()
        self.init_fn = init_fn
        
        self.fc1 = nn.Linear(in_features=input_dim, out_features=1024)
        self.bn1 = nn.BatchNorm1d(1024)

        self.fc2 = nn.Linear(in_features=1024, out_features=512)
        self.bn2 = nn.BatchNorm1d(512)

        self.fc3 = nn.Linear(in_features=512, out_features=256)
        self.bn3 = nn.BatchNorm1d(256)

        self.fc4 = nn.Linear(in_features=256, out_features=128)
        self.bn4 = nn.BatchNorm1d(128)

        self.fc5 = nn.Linear(in_features=128, out_features=64)
        self.bn5 = nn.BatchNorm1d(64)

        self.fc6 = nn.Linear(in_features=64, out_features=32)
        self.bn6 = nn.BatchNorm1d(32)

        self.fc7 = nn.Linear(in_features=32, out_features=16)
        self.bn7 = nn.BatchNorm1d(16)

        self.fc8 = nn.Linear(in_features=16, out_features=8)
        self.bn8 = nn.BatchNorm1d(8)

        self.fc_out = nn.Linear(in_features=8, out_features=num_classes)

        self.dropout = nn.Dropout(p=0.3)

        self.apply(self.init_weights)

    def init_weights(self, m):
        if isinstance(m, nn.Linear):
            self.init_fn(m.weight)
            m.bias.data.fill_(0.01)

    def forward(self, x):
        x = self.dropout(F.tanh(self.bn1(self.fc1(x))))
        x = self.dropout(F.elu(self.bn2(self.fc2(x))))
        x = self.dropout(F.elu(self.bn3(self.fc3(x))))
        x = self.dropout(F.elu(self.bn4(self.fc4(x))))
        x = self.dropout(F.elu(self.bn5(self.fc5(x))))
        x = self.dropout(F.elu(self.bn6(self.fc6(x))))
        x = self.dropout(F.elu(self.bn7(self.fc7(x))))
        x = self.dropout(F.elu(self.bn8(self.fc8(x))))
        x = F.softmax(self.fc_out(x), dim=1)
        return x

# Create and print the model
model = EvenMoreComplexNet(input_size, num_classes=num_classes, init_fn=torch.nn.init.xavier_normal_).to(device)
summary(model, (input_size,))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1024]          19,456
       BatchNorm1d-2                 [-1, 1024]           2,048
           Dropout-3                 [-1, 1024]               0
            Linear-4                  [-1, 512]         524,800
       BatchNorm1d-5                  [-1, 512]           1,024
           Dropout-6                  [-1, 512]               0
            Linear-7                  [-1, 256]         131,328
       BatchNorm1d-8                  [-1, 256]             512
           Dropout-9                  [-1, 256]               0
           Linear-10                  [-1, 128]          32,896
      BatchNorm1d-11                  [-1, 128]             256
          Dropout-12                  [-1, 128]               0
           Linear-13                   [-1, 64]           8,256
      BatchNorm1d-14                   

In [114]:
net = EvenMoreComplexNet(input_dim=input_size, num_classes=num_classes,
                     init_fn=torch.nn.init.xavier_normal_).to(device)
train_flag = True
name = "even_more_complex_net"

if train_flag:
    print(f"[i] Traing the network {net.__class__.__name__} ...")

    # Learning rate
    learning_rate = 3e-3
    
    # Define your criterion (e.g., CrossEntropyLoss for multiclass classification)
    criterion = nn.CrossEntropyLoss().to(device)
    
    # Define your optimizer (e.g., Adam)
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    # Cosine Annealing with Restarts (CWR) scheduler.
    # This scheduler is designed to automatically adjust the learning rate according to a cosine wave, and can be used to adjust the learning rate as the model converges.
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=round((1/10)*batch_size), T_mult=1, eta_min=0)

    # Train the model
    train(net, train_dataloader, val_dataloader, num_epochs, batch_size, mini_batch_size, optimizer, scheduler, criterion, name=name)
else: # load the model
    print(f"[i] Loading the network {net.__class__.__name__} ...")
    #Loading existing models (with saved weights)
    net.load_state_dict(torch.load(f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth', map_location=device)) #using saved data if present
    net.eval()

[i] Traing the network EvenMoreComplexNet ...
+++ [[1mepoch: 1[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch: 1, training loss: 1.82581, validation loss: 1.71995] +++
--- time-taken for epoch 1: 0.021 seconds ---
[90m--- current LR: 0.003 ---[0m
[92m+++ [validation loss decreased (inf -> 1.719954848), saving the model ...] +++[0m
+++ [[1mepoch: 2[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch: 2, training loss: 1.84712, validation loss: 1.71481] +++
--- time-taken for epoch 2: 0.028 seconds ---
[90m--- current LR: 0.002989063 ---[0m
[92m+++ [validation loss decreased (1.719954848 -> 1.714809179), saving the model ...] +++[0m
+++ [[1mepoch: 3[0m, validation - [91maccuracy: 0.83333[0m, [93mprecision: 0.00000[0m, [94mrecall: 0.00000[0m, [95mf1-score: 0.00000[0m] +++
+++ [epoch:

In [115]:
class SimplestNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, init_fn):
        super(SimplestNet, self).__init__()

        self.init_fn = init_fn
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        self.dropout = nn.Dropout(p=0.3)

        self.apply(self.init_weights)

    def init_weights(self, m):
        if isinstance(m, nn.Linear):
            self.init_fn(m.weight)
            m.bias.data.fill_(0.01)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return F.softmax(x, dim=1)

# Create and print the model
model = SimplestNet(input_size=input_size, hidden_size=512, num_classes=num_classes, init_fn=torch.nn.init.xavier_normal_).to(device)
summary(model, (input_size,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 512]           9,728
           Dropout-2                  [-1, 512]               0
            Linear-3                  [-1, 512]         262,656
           Dropout-4                  [-1, 512]               0
            Linear-5                    [-1, 6]           3,078
Total params: 275,462
Trainable params: 275,462
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 1.05
Estimated Total Size (MB): 1.07
----------------------------------------------------------------


In [116]:
net = SimplestNet(input_size=input_size, hidden_size=512, num_classes=num_classes, init_fn=torch.nn.init.xavier_normal_).to(device)
train_flag = True
name = "simplest_net"

if train_flag:
    print(f"[i] Traing the network {net.__class__.__name__} ...")

    # Learning rate
    learning_rate = 3e-3
    
    # Define your criterion (e.g., CrossEntropyLoss for multiclass classification)
    criterion = nn.CrossEntropyLoss().to(device)
    
    # Define your optimizer (e.g., Adam)
    optimizer = optim.Adam(net.parameters(), lr=learning_rate)

    # Cosine Annealing with Restarts (CWR) scheduler.
    # This scheduler is designed to automatically adjust the learning rate according to a cosine wave, and can be used to adjust the learning rate as the model converges.
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=round((1/10)*batch_size), T_mult=1, eta_min=0)

    # Train the model
    train(net, train_dataloader, val_dataloader, num_epochs, batch_size, mini_batch_size, optimizer, scheduler, criterion, name=name)
else: # load the model
    print(f"[i] Loading the network {net.__class__.__name__} ...")
    #Loading existing models (with saved weights)
    net.load_state_dict(torch.load(f'./runs/models/{(net.__class__.__name__).lower()}/{(net.__class__.__name__).lower()}_{name}_saved_model.pth', map_location=device)) #using saved data if present
    net.eval()

[i] Traing the network SimplestNet ...
+++ [[1mepoch: 1[0m, validation - [91maccuracy: 0.86667[0m, [93mprecision: 0.90000[0m, [94mrecall: 0.22500[0m, [95mf1-score: 0.36000[0m] +++
+++ [epoch: 1, training loss: 1.78566, validation loss: 1.64003] +++
--- time-taken for epoch 1: 0.01 seconds ---
[90m--- current LR: 0.003 ---[0m
[92m+++ [validation loss decreased (inf -> 1.640026331), saving the model ...] +++[0m
+++ [[1mepoch: 2[0m, validation - [91maccuracy: 0.86042[0m, [93mprecision: 0.58442[0m, [94mrecall: 0.56250[0m, [95mf1-score: 0.57325[0m] +++
+++ [epoch: 2, training loss: 1.52439, validation loss: 1.52111] +++
--- time-taken for epoch 2: 0.013 seconds ---
[90m--- current LR: 0.002989063 ---[0m
[92m+++ [validation loss decreased (1.640026331 -> 1.521108866), saving the model ...] +++[0m
+++ [[1mepoch: 3[0m, validation - [91maccuracy: 0.85417[0m, [93mprecision: 0.56250[0m, [94mrecall: 0.56250[0m, [95mf1-score: 0.56250[0m] +++
+++ [epoch: 3, trai

In [134]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt

# Define the evaluation function
def evaluate_model(model, dataloader):
    model.eval()
    actual_labels = []
    predicted_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)

            # Map the predicted class indices to class labels
            predicted_labels.append(np.argmax(outputs.cpu().numpy(), axis=1))
            actual_labels.append(np.argmax(labels.cpu().numpy(), axis=1))

    return actual_labels, predicted_labels

# Evaluate the model using the test dataset
actual_labels, predicted_lables = evaluate_model(net, test_dataloader)

actual_labels_flat = [item for sublist in actual_labels for item in sublist]
predicted_labels_flat = [item for sublist in predicted_lables for item in sublist]

conf_matrix = confusion_matrix(actual_labels_flat, predicted_labels_flat, normalize='pred')


In [135]:
conf_matrix

array([[0.48 , 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.095, 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.095, 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.25 , 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.01 , 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.07 , 0.   , 0.   , 0.   , 0.   , 0.   ]])

In [None]:

# Calculate performance metrics
accuracy = accuracy_score(actual_labels, predicted_lables)
precision = precision_score(actual_labels, predicted_lables, average='weighted', zero_division=1)
recall = recall_score(actual_labels, predicted_lables, average='weighted')
f1 = f1_score(actual_labels, predicted_lables, average='weighted')

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, cmap='Blues', xticklabels=np.unique(actual_labels), yticklabels=np.unique(actual_labels))
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Print overall performance metrics
print("Overall Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)

# Individual class metrics
class_metrics = {'class': [], 'accuracy': [], 'precision': [], 'recall': [], 'f1_score': []}
for label in np.unique(actual_labels):
    class_accuracy = accuracy_score(actual_labels == label, predicted_lables == label)
    class_precision = precision_score(actual_labels == label, predicted_lables == label, zero_division=0)
    class_recall = recall_score(actual_labels == label, predicted_lables == label)
    class_f1 = f1_score(actual_labels == label, predicted_lables == label)

    class_metrics['class'].append(label)
    class_metrics['accuracy'].append(class_accuracy)
    class_metrics['precision'].append(class_precision)
    class_metrics['recall'].append(class_recall)
    class_metrics['f1_score'].append(class_f1)


# Convert to DataFrame for better readability
class_metrics_df = pd.DataFrame(class_metrics)
print("\nIndividual Class Metrics:\n", class_metrics_df)