In [None]:
#Extreme Gradient Boosting

In [20]:
#IMPORTANT NOTE: This notebook contains the creation of almost all models. Therefore a teststructure was build and reused
#The different cells implement different settings or ML structures where the loops and the MODEL HERE part are changed

#For each ML architecture, the results are written to a specific csv file
#There, Each model is represented by a new line with a unique index, the used parameter settings as well as different metrics

#This cell initializes the DataFrame for the XGB models. Running it would overwrite all stored results
'''#XGB Results
results = pd.DataFrame(columns=["max_depth", "learning_rate", "n_estimators", "min_child_weight", "subsample", "objective", "eval_metric", "Seed", "VBS_test", "SBS_test", "Accuracy_test", "Score_test", "Gap_test", "VBS_train", "SBS_train","Accuracy_train", "Score_train", "Gap_train"])
results.to_csv("XGB_results.csv", index=True, sep=",", header=True, index_label=None, na_rep="")
results.to_csv("XGB_results_to_c_z.csv", index=True, sep=",", header=True, index_label=None, na_rep="")'''

In [66]:
#Initialization of the second result file for the XGBClassifier
'''results = pd.DataFrame(columns=["max_depth", "learning_rate", "n_estimators", "min_child_weight", "subsample", "Seed", "VBS_test", "SBS_test", "Accuracy_test", "Score_test", "Gap_test", "VBS_train", "SBS_train","Accuracy_train", "Score_train", "Gap_train"])
results.to_csv("XGB_results2.csv", index=True, sep=",", header=True, index_label=None, na_rep="")'''

In [None]:
#Manual GridSearch for XGBoost Constraints
#Already import the packages necessary for all models
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from IPython.display import display

#Function to convert labels to integers
def label_to_int(label):
    for index in label.index: #For every instance
        match label.at[index, "algorithm"]: #Based on the algorithm, write an integer value
            case "greedy": label.at[index, "algorithm"] = 0
            case "dynamic_programming": label.at[index, "algorithm"] = 1
            case "branch_and_bound": label.at[index, "algorithm"] = 2
            case _: pass 

#Function to convert integers to labels
def int_to_label(label):
    for index in label.index:
        match label.at[index, "algorithm"]:
            case 0: label.at[index, "algorithm"] = "greedy"
            case 1: label.at[index, "algorithm"] = "dynamic_programming"
            case 2: label.at[index, "algorithm"] = "branch_and_bound"
            case _: pass

#Function to determine the average score given the predictions vector
#Given the scores and the predictions as dataframe with an index and a single column "algorithm"
def calculate_score(scores, predictions):
    for index in predictions.index: #For each instance
        
        #Read the prediction
        algorithm = predictions.at[index, "algorithm"]
        
        #Write the score to a new column "score"
        predictions.at[index, "score"] = scores.at[index, algorithm]
    
    #Return the mean of the scores
    return predictions["score"].mean()

#For loops to test every single combination of parameters

#Parameter settings for the first generation of XGB models
'''for max_depth in [3, 6]:
    for learning_rate in [0.01, 0.1]:
        for n_estimators in [100, 200]:
            for min_child_weight in [1, 2]:
                for subsample in [0.8, 1]:
                    #Based on the scaled instances, the result path is set accordingly
                    for scaled_instances in ["instances.csv", "instances_to_c_z.csv"]'''

for max_depth in [2, 3, 4]:
    for learning_rate in [0.001, 0.01]:
        for n_estimators in [50, 100, 200]:
            for min_child_weight in [0.5, 1, 2]:
                for subsample in [0.6, 0.7, 0.8, 0.9]:
                    
                    #Use identical split for the train and test set
                    seed = 1
                    
                    #Define a model description that contains every parameter to create a unique name for each model
                    model_description = "XGB_depth_"+str(max_depth)+"_lr_"+str(learning_rate)+"_estimators_"+str(n_estimators)+"_childweight_"+str(min_child_weight)+"_subsample_"+str(subsample)+"_seed_"+str(seed)
                    
                    #Set the result and instance path
                    result_path = "XGB_results2.csv"
                    scaled_instances = "instances.csv"
                    
                    #Read the scores (independent from the instance path)
                    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})
                    
                    #Load the data and results from the given paths
                    data = pd.DataFrame(pd.read_csv(scaled_instances, header=0, index_col=0))
                    results = pd.DataFrame(pd.read_csv(result_path, header=0, index_col=0))

                    #Write the parameter settings to the DataFrame results (Twice as they are already part of the index)
                    results.at[model_description, "max_depth"] = max_depth
                    results.at[model_description, "learning_rate"] = learning_rate
                    results.at[model_description, "n_estimators"] = n_estimators
                    results.at[model_description, "min_child_weight"] = min_child_weight
                    results.at[model_description, "subsample"] = subsample
                    results.at[model_description, "Seed"] = seed

                    #Split the data into train sed and test set using the set seed of 1
                    X_train, X_test, y_train, y_test = train_test_split(data, scores, test_size=0.2, random_state=seed)

                    
                    #Calculation of SBS and VBS for the test set (identical across all instances)
                    test_scores = pd.DataFrame(y_test)
                    
                    #Add a column containing the minimum value of the three algorithms
                    test_scores["best_solver"] = test_scores.min(axis=1)
                    
                    #Calculate the average scores for each algorithm (minimum is used as SBS) and the VBS
                    average_greedy, average_dynamic_programming, average_branch_and_bound, test_average_best_solver = test_scores.mean(axis=0)
                    test_average_single_best_solver = min(average_greedy, average_dynamic_programming, average_branch_and_bound)

                    #Write the scores to the DataFrame results
                    results.at[model_description, "VBS_test"] = test_average_best_solver
                    results.at[model_description, "SBS_test"] = test_average_single_best_solver

                    
                    #Calculation of SBS and VBS for the train set (identical to the calculation on the test set)
                    train_scores = pd.DataFrame(y_train)
                    train_scores["best_solver"] = train_scores.min(axis=1)
                    
                    #SBS and VBS as described before
                    average_greedy, average_dynamic_programming, average_branch_and_bound, train_average_best_solver = train_scores.mean(axis=0)
                    train_average_single_best_solver = min(average_greedy, average_dynamic_programming, average_branch_and_bound)

                    #Write the scores to the DataFrame results (identical across all instances)
                    results.at[model_description, "VBS_train"] = train_average_best_solver
                    results.at[model_description, "SBS_train"] = train_average_single_best_solver

                    #Prepare the labels as a DataFrame with a single column containing the optimal algorithm for each instance
                    y_train = pd.DataFrame(y_train.idxmin(axis=1), columns=["algorithm"])
                    y_test = pd.DataFrame(y_test.idxmin(axis=1), columns=["algorithm"])
                    
                    #Copy the labels to use them for the calculation of the accuracy
                    y_train_accuracy = y_train.copy()
                    y_test_accuracy = y_test.copy()
                    
                    #Convert the labels to an integer representation
                    label_to_int(y_train)
                    y_train = y_train.astype(int)
                    label_to_int(y_test)
                    y_test = y_test.astype(int)

                    #Save the indices of the labels
                    y_train_indices = y_train.index
                    y_test_indices = y_test.index

                    #---------------------------------------------------------------------------------------------------------------
                    #Part where the model is defined and used to make predictions
                    #This is the main difference across the cells in this notebook
                    
                    #MODEL HERE
                    
                    #Define the XGBClassifier model with the given parameters
                    model = XGBClassifier(
                        max_depth=max_depth,
                        learning_rate=learning_rate,
                        n_estimators=n_estimators,
                        min_child_weight=min_child_weight,
                        subsample=subsample,
                        seed=1
                    )

                    #Train the model on the train data
                    model.fit(X_train, y_train)

                    #Use the trained model to predict the algorithms on the train set (with according indices)
                    predictions_train = pd.DataFrame(model.predict(X_train), columns=["algorithm"], index=y_train_indices)
                    
                    #Use the trained model to predict the algorithms on the test set (with according indices)
                    predictions_test = pd.DataFrame(model.predict(X_test), columns=["algorithm"], index=y_test_indices)
                    
                    #End of model description part, following content is identical across the cells
                    #---------------------------------------------------------------------------------------------------------------

                    
                    #Convert the predictions on the train set to labels
                    int_to_label(predictions_train)

                    #Load the scores so that they are uneffected by any previous calculations
                    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})

                    #Calculate the score for the train set and save it in the DataFrame results
                    score_train = calculate_score(scores, predictions_train)
                    results.at[model_description, "Score_train"] = score_train

                    
                    #Convert the predictions on the test set to labels
                    int_to_label(predictions_test)

                    #Load the scores once more
                    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})

                    #Calculate the score on the test set and write it to the results
                    score_test = calculate_score(scores, predictions_test)
                    results.at[model_description, "Score_test"] = score_test

                    
                    #Calculation of the accuracy
                    predictions_train = pd.DataFrame(predictions_train)
                    predictions_test = pd.DataFrame(predictions_test)

                    #Use sklearn.metrics function to calculate the accuracy and round it for an equal length
                    accuracy_train = accuracy_score(y_train_accuracy, predictions_train["algorithm"]).round(4)
                    accuracy_test = accuracy_score(y_test_accuracy, predictions_test["algorithm"]).round(4)
                    
                    #Write the accuracy to the results
                    results.at[model_description, "Accuracy_train"] = accuracy_train
                    results.at[model_description, "Accuracy_test"] = accuracy_test

                    
                    #Calculation of the gaps
                    #Load the previously calculated scores for the train and test set
                    VBS_train = train_average_best_solver
                    SBS_train = train_average_single_best_solver
                    AAS_train = score_train
                    VBS_test = test_average_best_solver
                    SBS_test = test_average_single_best_solver
                    AAS_test = score_test

                    #Calculate the gaps in the sets (independend of the model)
                    Gap_train = SBS_train - VBS_train
                    Gap_test = SBS_test - VBS_test
                    
                    #Calculate the achived score by subtracting the optimal score of the VBS
                    Score_train = AAS_train - VBS_train
                    Score_test = AAS_test - VBS_test

                    #Calculate the gap closed
                    GapClose_train = 1-(Score_train/Gap_train)
                    GapClose_test = 1-(Score_test/Gap_test)

                    #Save the gap closed in the DataFrame results
                    results.at[model_description, "Gap_train"] = GapClose_train
                    results.at[model_description, "Gap_test"] = GapClose_test

                    
                    #Save the DataFrame results by overwriting the csv file (This appends the new row and does not change the rest)
                    results.to_csv(result_path, index=True, sep=",", header=True, index_label=None, na_rep="")
                    
                    #Output to keep track of the models
                    print(str(model_description)+" calculated")

In [None]:
#Display the results. Those results are revisited and describes in the notebook "ResultDiscussion.ipynb"

#Load the results
table = pd.DataFrame(pd.read_csv("XGB_results2.csv", header=0, index_col=0))

#Output the describtion of the results (presents average Scores, minima and maxima) 
print(table.describe())

#Only present the top of the table
table = table[:10]

#Display the table
display(table)

In [None]:
#Remove identical entrys from the first generation of XGB models
XGB = pd.DataFrame(pd.read_csv("XGB_results.csv", header=0, index_col=0))

#Sort the rows by their VBS-SBS gap closure
XGB = XGB.sort_values(by=["Gap_test"], ascending=False)

#Only keep one of the four identical entrys
XGB = XGB[XGB.objective == "binary:logistic"]
XGB = XGB[XGB.eval_metric == "merror"]

#Drop the unused columns
XGB = XGB.drop(labels=["objective", "eval_metric"], axis=1)
XGB = XGB.sort_values(by=["Gap_test"], ascending=False)

#Print the describtion of the results and overwrite the file
print(XGB.describe())
XGB.to_csv("XGB_results.csv", index=True, sep=",", header=True, index_label=None, na_rep="")

In [None]:
#Support Vector Machines

In [1]:
#Define the results file and initialize the columns used for the different models
#Running this code would overwrite the results!
'''results = pd.DataFrame(columns=["C", "kernel", "class_weight", "degree", "VBS_test", "SBS_test", "Accuracy_test", "Score_test", "Gap_test", "VBS_train", "SBS_train","Accuracy_train", "Score_train", "Gap_train"])
results.to_csv("SVM_results.csv", index=True, sep=",", header=True, index_label=None, na_rep="")'''

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#IMPORTANT NOTE: Most of the code is identical to the generation of the XGB models. Therefore it is not documented in detail
#General approach for the SVC: define the code in functions to use two function calls if the kernel is "poly" 
#Grid Search for SVC
from sklearn.svm import SVC
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from IPython.display import display

#Function to convert labels to integers and back
def label_to_int(label):
    for index in label.index:
        match label.at[index, "algorithm"]:
            case "greedy": label.at[index, "algorithm"] = 0
            case "dynamic_programming": label.at[index, "algorithm"] = 1
            case "branch_and_bound": label.at[index, "algorithm"] = 2
            case _: pass 

def int_to_label(label):
    for index in label.index:
        match label.at[index, "algorithm"]:
            case 0: label.at[index, "algorithm"] = "greedy"
            case 1: label.at[index, "algorithm"] = "dynamic_programming"
            case 2: label.at[index, "algorithm"] = "branch_and_bound"
            case _: pass

#Function to determine the average score given the predictions vector
#Given the scores and the predictions as dataframe with an index and a single column "algorithm"
def calculate_score(scores, predictions):
    for index in predictions.index:
        algorithm = predictions.at[index, "algorithm"]
        predictions.at[index, "score"] = scores.at[index, algorithm]
    return predictions["score"].mean()

#Define the code as function to execute twice if kernel = poly
def loop_iteration(kernel, C, class_weight, counter, degree=3):
    
    #Set seed and tolerance equal across all instances
    seed = 1
    tolerance = 0.001
    
    #Define the unique model description used as index
    model_description = "SVM_C_"+str(C)+"_kernel_"+str(kernel)+"_classweight_"+str(class_weight)+"_degree_"+str(degree)
    result_path = "SVM_results.csv"
    scaled_instances = "instances.csv"
    
    #Load scores, data and results
    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})
    data = pd.DataFrame(pd.read_csv(scaled_instances, header=0, index_col=0))
    results = pd.DataFrame(pd.read_csv(result_path, header=0, index_col=0))

    #Save the parameter settings in the DataFrame results 
    results.at[model_description, "C"] = C
    results.at[model_description, "kernel"] = kernel
    results.at[model_description, "class_weight"] = class_weight
    results.at[model_description, "degree"] = degree

    #Split the data into train set and test set
    X_train, X_test, y_train, y_test = train_test_split(data, scores, test_size=0.2, random_state=1)

    #Calculation of SBS and VBS for test
    test_scores = pd.DataFrame(y_test)
    test_scores["best_solver"] = test_scores.min(axis=1)

    average_greedy, average_dynamic_programming, average_branch_and_bound, test_average_best_solver = test_scores.mean(axis=0)
    test_average_single_best_solver = min(average_greedy, average_dynamic_programming, average_branch_and_bound)

    results.at[model_description, "VBS_test"] = test_average_best_solver
    results.at[model_description, "SBS_test"] = test_average_single_best_solver

    #Calculation of SBS and VBS for train
    train_scores = pd.DataFrame(y_train)
    train_scores["best_solver"] = train_scores.min(axis=1)

    average_greedy, average_dynamic_programming, average_branch_and_bound, train_average_best_solver = train_scores.mean(axis=0)
    train_average_single_best_solver = min(average_greedy, average_dynamic_programming, average_branch_and_bound)

    results.at[model_description, "VBS_train"] = train_average_best_solver
    results.at[model_description, "SBS_train"] = train_average_single_best_solver

    #Prepare the labels
    y_train = pd.DataFrame(y_train.idxmin(axis=1), columns=["algorithm"])
    y_test = pd.DataFrame(y_test.idxmin(axis=1), columns=["algorithm"])
    y_train_accuracy = y_train.copy()
    y_test_accuracy = y_test.copy()

    #Convert the labels to integers
    label_to_int(y_train)
    y_train = y_train.astype(int)
    label_to_int(y_test)
    y_test = y_test.astype(int)

    #Save the indices
    y_train_indices = y_train.index
    y_test_indices = y_test.index

    #---------------------------------------------------------------------------------------------------------------
    #MODEL HERE

    #Define the SVM classifier (SVC)
    model = SVC(
        C=C,
        kernel=kernel,
        tol=0.001,
        class_weight=class_weight,
        degree = degree #For kernel = poly, all other kernels ignore this parameter
        )

    #Train the model
    model.fit(X_train, y_train.values.ravel())

    #Use the model to predict the algorithms on the train set as well as the test set
    predictions_train = pd.DataFrame(model.predict(X_train), columns=["algorithm"], index=y_train_indices)
    predictions_test = pd.DataFrame(model.predict(X_test), columns=["algorithm"], index=y_test_indices)
    #---------------------------------------------------------------------------------------------------------------
    
    #Calculate the score on the train set
    int_to_label(predictions_train)

    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})

    score_train = calculate_score(scores, predictions_train)
    results.at[model_description, "Score_train"] = score_train

    #Calculate the score on the test set
    int_to_label(predictions_test)

    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})

    score_test = calculate_score(scores, predictions_test)
    results.at[model_description, "Score_test"] = score_test

    #Calculate the accuracy
    predictions_train = pd.DataFrame(predictions_train)
    predictions_test = pd.DataFrame(predictions_test)

    accuracy_train = accuracy_score(y_train_accuracy, predictions_train["algorithm"]).round(4)
    accuracy_test = accuracy_score(y_test_accuracy, predictions_test["algorithm"]).round(4)
    results.at[model_description, "Accuracy_train"] = accuracy_train
    results.at[model_description, "Accuracy_test"] = accuracy_test

    #Calculate the gaps
    VBS_train = train_average_best_solver
    SBS_train = train_average_single_best_solver
    AAS_train = score_train
    VBS_test = test_average_best_solver
    SBS_test = test_average_single_best_solver
    AAS_test = score_test

    Gap_train = SBS_train - VBS_train
    Gap_test = SBS_test - VBS_test
    Score_train = AAS_train - VBS_train
    Score_test = AAS_test - VBS_test

    GapClose_train = 1-(Score_train/Gap_train)
    GapClose_test = 1-(Score_test/Gap_test)
    results.at[model_description, "Gap_train"] = GapClose_train
    results.at[model_description, "Gap_test"] = GapClose_test

    results.to_csv(result_path, index=True, sep=",", header=True, index_label=None, na_rep="")
    print(str(model_description)+" calculated, "+str(counter)+"/32")
    counter += 1
    return counter #Return counter for better progress information

counter = 1 #Initialize counter

#For loops to test every combination of the parameters
for kernel in ["rbf", "poly", "linear"]:
    for C in [0.01, 0.1, 0.2, 1]:
        for class_weight in [None, "balanced"]:
            
            #Execute the defined loop twice with different degree settings if the kernel is poly
            if kernel == "poly":
                counter = loop_iteration(kernel, C, class_weight, counter, degree = 2)
                counter = loop_iteration(kernel, C, class_weight, counter, degree = 3)
            #Only train one model for the combination of parameters if the kernel is not poly
            else:
                counter = loop_iteration(kernel, C, class_weight, counter)

SVM_C_0.01_kernel_rbf_classweight_None_degree_3 calculated, 1/32
SVM_C_0.01_kernel_rbf_classweight_balanced_degree_3 calculated, 2/32
SVM_C_0.1_kernel_rbf_classweight_None_degree_3 calculated, 3/32
SVM_C_0.1_kernel_rbf_classweight_balanced_degree_3 calculated, 4/32
SVM_C_0.2_kernel_rbf_classweight_None_degree_3 calculated, 5/32
SVM_C_0.2_kernel_rbf_classweight_balanced_degree_3 calculated, 6/32
SVM_C_1_kernel_rbf_classweight_None_degree_3 calculated, 7/32
SVM_C_1_kernel_rbf_classweight_balanced_degree_3 calculated, 8/32
SVM_C_0.01_kernel_poly_classweight_None_degree_2 calculated, 9/32
SVM_C_0.01_kernel_poly_classweight_None_degree_3 calculated, 10/32
SVM_C_0.01_kernel_poly_classweight_balanced_degree_2 calculated, 11/32
SVM_C_0.01_kernel_poly_classweight_balanced_degree_3 calculated, 12/32
SVM_C_0.1_kernel_poly_classweight_None_degree_2 calculated, 13/32
SVM_C_0.1_kernel_poly_classweight_None_degree_3 calculated, 14/32
SVM_C_0.1_kernel_poly_classweight_balanced_degree_2 calculated, 15/

In [3]:
#Additional SVC models, kernel = rbf, class_weight = None, C = [0.3, 0.5, 0.7, 0.9]
#The same result file is used for the two generations of SVC models
counter = 1
for kernel in ["rbf"]:
    for C in [0.3, 0.5, 0.7, 0.9]:
        for class_weight in [None]:
            #Using the function defined in the previous cell
            counter = loop_iteration(kernel, C, class_weight, counter)

SVM_C_0.3_kernel_rbf_classweight_None_degree_3 calculated, 1/4
SVM_C_0.5_kernel_rbf_classweight_None_degree_3 calculated, 2/4
SVM_C_0.7_kernel_rbf_classweight_None_degree_3 calculated, 3/4
SVM_C_0.9_kernel_rbf_classweight_None_degree_3 calculated, 4/4


In [16]:
#Initialize the results file for the first MLP models
'''results = pd.DataFrame(columns=["model", "epochs", "weights", "criterion", "optimizer", "learning_rate", "L1_norm", "VBS_test", "SBS_test", "Accuracy_test", "Score_test", "Gap_test", "VBS_train", "SBS_train","Accuracy_train", "Score_train", "Gap_train"])
results.to_csv("MLP_results.csv", index=True, sep=",", header=True, index_label=None, na_rep="")'''

In [None]:
#Again, a detailed description of the calculation steps independent from the model is given in the first cell
#Grid Search for MLP
from sklearn.svm import SVC
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from IPython.display import display
from torch.utils.data import DataLoader, TensorDataset

#Function to convert labels to integers and back
def label_to_int(label):
    for index in label.index:
        match label.at[index, "algorithm"]:
            case "greedy": label.at[index, "algorithm"] = 0
            case "dynamic_programming": label.at[index, "algorithm"] = 1
            case "branch_and_bound": label.at[index, "algorithm"] = 2
            case _: pass 

def int_to_label(label):
    for index in label.index:
        match label.at[index, "algorithm"]:
            case 0: label.at[index, "algorithm"] = "greedy"
            case 1: label.at[index, "algorithm"] = "dynamic_programming"
            case 2: label.at[index, "algorithm"] = "branch_and_bound"
            case _: pass

#Function to determine the average score given the predictions vector
#Given the scores and the predictions as dataframe with an index and a single column "algorithm"
def calculate_score(scores, predictions):
    for index in predictions.index:
        algorithm = predictions.at[index, "algorithm"]
        predictions.at[index, "score"] = scores.at[index, algorithm]
    return predictions["score"].mean()

#Four functions that define the four neural network structures used for the first generation of models. Only the layers and dropout changes
#Function that returns a neural network with id 1
def create_model_1(l2_reg=0):
    #Create a class for the model
    class Model_Small_Dropout(nn.Module):
        #Define the initialization
        def __init__(self, l2_reg=0):
            super(Model_Small_Dropout, self).__init__()
            
            #Defining the different layers and parameters of the model
            self.fc1 = nn.Linear(2002, 500) #Fully connected
            self.bn1 = nn.BatchNorm1d(500) #BatchNorm
            self.dropout1 = nn.Dropout(p=0.5) #Dropout
            self.fc2 = nn.Linear(500, 3) #Fully Connected
            self.relu = nn.ReLU() #Activation
            self.softmax = nn.Softmax(dim=1) #Softmax for the final layer
            self.l2_reg = l2_reg

        #Definition of the forward pass
        def forward(self, x):
            #Sequence of layers defined in the init function
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.dropout1(x)
            x = self.fc2(x)
            x = self.softmax(x)
            return x
    #When the function is called, it returns the model
    return Model_Small_Dropout(l2_reg=l2_reg)
    
#Function that returns a neural network with id 2
def create_model_2(l2_reg=0):
    #This class of MLP models consists of the same structure as model 1 but does not implement dropout
    class Model_Small(nn.Module):
        def __init__(self, l2_reg=0):
            super(Model_Small, self).__init__()
            self.fc1 = nn.Linear(2002, 500)
            self.bn1 = nn.BatchNorm1d(500)
            self.fc2 = nn.Linear(500, 3)
            self.relu = nn.ReLU()
            self.softmax = nn.Softmax(dim=1)
            self.l2_reg = l2_reg

        def forward(self, x):
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.fc2(x)
            x = self.softmax(x)
            return x
    return Model_Small(l2_reg=l2_reg)

#Function that returns a neural network with id 3
def create_model_3(l2_reg=0):
    class Model_Large_Dropout(nn.Module):
        def __init__(self, l2_reg=0):
            super(Model_Large_Dropout, self).__init__()
            #Compared to the first two models, this one implements more layers and thus increases the complexity
            self.fc1 = nn.Linear(2002, 1000) 
            self.bn1 = nn.BatchNorm1d(1000)
            self.dropout1 = nn.Dropout(p=0.5)
            self.fc2 = nn.Linear(1000, 500)
            self.bn2 = nn.BatchNorm1d(500)
            self.dropout2 = nn.Dropout(p=0.5)
            self.fc3 = nn.Linear(500, 100)
            self.bn3 = nn.BatchNorm1d(100)
            self.dropout3 = nn.Dropout(p=0.5)
            self.fc4 = nn.Linear(100, 3)
            self.relu = nn.ReLU()
            self.softmax = nn.Softmax(dim=1)
            self.l2_reg = l2_reg

        #Structure of the forward pass is similar to the first models
        def forward(self, x):
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.dropout1(x)
            x = self.fc2(x)
            x = self.bn2(x)
            x = self.relu(x)
            x = self.dropout2(x)
            x = self.fc3(x)
            x = self.bn3(x)
            x = self.relu(x)
            x = self.dropout3(x)
            x = self.fc4(x)
            x = self.softmax(x)
            return x
    return Model_Large_Dropout(l2_reg=l2_reg)

#Function that returns a neural network with id 4
def create_model_4(l2_reg=0):
    class Model_Large(nn.Module):
        #Identical structure to model 3, this time without dropout
        def __init__(self, l2_reg=0):
            super(Model_Large, self).__init__()
            self.fc1 = nn.Linear(2002, 1000)
            self.bn1 = nn.BatchNorm1d(1000)
            self.fc2 = nn.Linear(1000, 500)
            self.bn2 = nn.BatchNorm1d(500)
            self.fc3 = nn.Linear(500, 100)
            self.bn3 = nn.BatchNorm1d(100)
            self.fc4 = nn.Linear(100, 3)
            self.relu = nn.ReLU()
            self.softmax = nn.Softmax(dim=1)
            self.l2_reg = l2_reg

        def forward(self, x):
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.fc2(x)
            x = self.bn2(x)
            x = self.relu(x)
            x = self.fc3(x)
            x = self.bn3(x)
            x = self.relu(x)
            x = self.fc4(x)
            x = self.softmax(x)
            return x
    return Model_Large(l2_reg=l2_reg)

#Definition of the closure function to train with the optimizer LBFGS
def closure():
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train.squeeze(1))
    loss.backward()
    return loss

#Initialize counter
counter = 1

#For loops to test every combination of parameters
for model_id in [1, 2, 3, 4]:
    for weights_name in ["balanced", "normal"]:
        for optimizer_name in ["Adam", "LBFGS", "SGD"]:
            for learning_rate in [0.0003, 0.01, 0.1, 1]:
                for l1_norm in [0, 0.0001]:
                    
                    #Set the epochs and train_test_split seed equal across the instances
                    epochs = 200
                    seed = 1
                    
                    #Unique index for each model
                    model_description = "MLP_modelID_"+str(model_id)+"_weights_"+str(weights_name)+"_optimizer_"+str(optimizer_name)+"_learningrate_"+str(learning_rate)+"_l1Norm_"+str(l1_norm)
                    
                    #Load the necessary data
                    result_path = "MLP_results.csv"
                    scaled_instances = "instances.csv"
                    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})
                    data = pd.DataFrame(pd.read_csv(scaled_instances, header=0, index_col=0))
                    results = pd.DataFrame(pd.read_csv(result_path, header=0, index_col=0))

                    #Write the parameters to the result file
                    results.at[model_description, "epochs"] = epochs
                    results.at[model_description, "weights"] = weights_name
                    results.at[model_description, "optimizer"] = optimizer_name
                    results.at[model_description, "learning_rate"] = learning_rate
                    results.at[model_description, "L1_norm"] = l1_norm
                    results.at[model_description, "criterion"] = "CrossEntropyLoss"

                    #Split the data into train set and test set
                    X_train, X_test, y_train, y_test = train_test_split(data, scores, test_size=0.2, random_state=seed)

                    #Calculation of SBS and VBS for the test set
                    test_scores = pd.DataFrame(y_test)
                    test_scores["best_solver"] = test_scores.min(axis=1)

                    average_greedy, average_dynamic_programming, average_branch_and_bound, test_average_best_solver = test_scores.mean(axis=0)
                    test_average_single_best_solver = min(average_greedy, average_dynamic_programming, average_branch_and_bound)

                    results.at[model_description, "VBS_test"] = test_average_best_solver
                    results.at[model_description, "SBS_test"] = test_average_single_best_solver

                    #Calculation of SBS and VBS for the train set
                    train_scores = pd.DataFrame(y_train)
                    train_scores["best_solver"] = train_scores.min(axis=1)

                    average_greedy, average_dynamic_programming, average_branch_and_bound, train_average_best_solver = train_scores.mean(axis=0)
                    train_average_single_best_solver = min(average_greedy, average_dynamic_programming, average_branch_and_bound)

                    results.at[model_description, "VBS_train"] = train_average_best_solver
                    results.at[model_description, "SBS_train"] = train_average_single_best_solver

                    #Prepare the labels
                    y_train = pd.DataFrame(y_train.idxmin(axis=1), columns=["algorithm"])
                    y_test = pd.DataFrame(y_test.idxmin(axis=1), columns=["algorithm"])
                    y_train_accuracy = y_train.copy()
                    y_test_accuracy = y_test.copy()

                    label_to_int(y_train)
                    y_train = y_train.astype(int)
                    label_to_int(y_test)
                    y_test = y_test.astype(int)

                    #Convert labels to integers
                    label_to_int(y_train)
                    label_to_int(y_test)


                    y_train_indices = y_train.index
                    y_test_indices = y_test.index

                    #---------------------------------------------------------------------------------------------------------------
                    #MODEL HERE
                    
                    #Convert the data to torch tensors with the data types float (data) or long (labels)
                    X_train = torch.tensor(X_train.values, dtype=torch.float)
                    y_train = torch.tensor(y_train.values, dtype=torch.long)
                    X_test = torch.tensor(X_test.values, dtype=torch.float)
                    y_test = torch.tensor(y_test.values, dtype=torch.long)

                    #Create the specific model and write the according description to the DataFrame results
                    match model_id:
                        case 1:
                            results.at[model_description, "model"] = "Small_Dropout"
                            model = create_model_1()
                        case 2:
                            results.at[model_description, "model"] = "Small"
                            model = create_model_2()
                        case 3:
                            results.at[model_description, "model"] = "Large_Dropout"
                            model = create_model_3()
                        case 4:
                            results.at[model_description, "model"] = "Large"
                            model = create_model_4()

                    #Define the weights
                    match weights_name:
                        case "normal":
                            #Equal weights
                            weights = [1., 1., 1.]
                        case "balanced":
                            #Balanced with class_weight(i) = #instances/(#classes*#instances_in_class(i))
                            weights = [0.5678, 1.1445, 2.7385]

                    #Define the loss function and optimizer                    
                    criterion = nn.CrossEntropyLoss(torch.tensor(weights))
                    match optimizer_name:
                        case "Adam":
                            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
                        case "LBFGS":
                            optimizer = torch.optim.LBFGS(model.parameters(), lr=learning_rate)
                        case "SGD": 
                            #SGD is implemented with a momentum of 0.9
                            optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

                    #Train the model
                    if optimizer_name == "LBFGS":
                        #LBFGS is trained with the defined Closure
                        for i in range(epochs): #Iterate over epochs 
                            optimizer.step(closure)
                    else:
                        #Training of Adam and SGD
                        for epoch in range(epochs): #Iterate over epochs                          
                            #Reset the gradients to zero
                            optimizer.zero_grad()
                            
                            #Calculate the predictions of the model
                            outputs = model(X_train)
                            
                            #Calculate the loss
                            loss = criterion(outputs, y_train.squeeze(1))

                            #Add activity regularization (l1 norm)
                            l1_lambda = l1_norm #Parameter for impact of the regularization
                            l1_reg = torch.tensor(0.) #Initialize the value
                            #Add the value of each parameter to l1_reg 
                            for name, parameter in model.named_parameters():
                                l1_reg += torch.norm(parameter, 1)
                            #Add the scaled values to the overall loss
                            loss += l1_lambda * l1_reg

                            #Compute gradients of the loss
                            loss.backward()
                            
                            #Update parameters of the model
                            optimizer.step()

                    #Evaluate the model on the train set
                    with torch.no_grad(): #Do not change the gradients
                        
                        #Calculate the predictions
                        outputs = model(X_train)
                        
                        #Reduce the output values to the model with the highest score
                        predicted = torch.argmax(outputs, dim=1) 
                    
                    #Define the predictions as DataFrame to calculate the score
                    predictions_train = pd.DataFrame(predicted, columns=["algorithm"], index=y_train_indices)

                    #Evaluate the model on the test set
                    with torch.no_grad():
                        
                        #Get predictions for each instance
                        outputs = model(X_test)
                        predicted = torch.argmax(outputs, dim=1) 

                    #Define the predictions as DataFrame to calculate the score
                    predictions_test = pd.DataFrame(predicted, columns=["algorithm"], index=y_test_indices)
                    #---------------------------------------------------------------------------------------------------------------
                    
                    #Calculate the score of the predictions on the train set
                    int_to_label(predictions_train)

                    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})

                    score_train = calculate_score(scores, predictions_train)
                    results.at[model_description, "Score_train"] = score_train

                    #Calculate the score of the predictions on the test set
                    int_to_label(predictions_test)

                    scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                    scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})

                    score_test = calculate_score(scores, predictions_test)
                    results.at[model_description, "Score_test"] = score_test

                    #Calculate the accuracy of the predictions
                    predictions_train = pd.DataFrame(predictions_train)
                    predictions_test = pd.DataFrame(predictions_test)

                    accuracy_train = accuracy_score(y_train_accuracy, predictions_train["algorithm"]).round(4)
                    accuracy_test = accuracy_score(y_test_accuracy, predictions_test["algorithm"]).round(4)
                    results.at[model_description, "Accuracy_train"] = accuracy_train
                    results.at[model_description, "Accuracy_test"] = accuracy_test

                    #Calculate the gaps
                    VBS_train = train_average_best_solver
                    SBS_train = train_average_single_best_solver
                    AAS_train = score_train
                    VBS_test = test_average_best_solver
                    SBS_test = test_average_single_best_solver
                    AAS_test = score_test

                    Gap_train = SBS_train - VBS_train
                    Gap_test = SBS_test - VBS_test
                    Score_train = AAS_train - VBS_train
                    Score_test = AAS_test - VBS_test

                    GapClose_train = 1-(Score_train/Gap_train)
                    GapClose_test = 1-(Score_test/Gap_test)
                    results.at[model_description, "Gap_train"] = GapClose_train
                    results.at[model_description, "Gap_test"] = GapClose_test

                    #Overwrite the previous results file
                    results.to_csv(result_path, index=True, sep=",", header=True, index_label=None, na_rep="")
                    
                    #Output to keep track of progress and increase the counter
                    print(str(model_description)+" calculated, "+str(counter)+"/192")
                    counter += 1

MLP_modelID_1_weights_balanced_optimizer_Adam_learningrate_0.0003_l1Norm_0.0001 calculated, 2/192
MLP_modelID_1_weights_balanced_optimizer_Adam_learningrate_0.01_l1Norm_0 calculated, 3/192
MLP_modelID_1_weights_balanced_optimizer_Adam_learningrate_0.01_l1Norm_0.0001 calculated, 4/192
MLP_modelID_1_weights_balanced_optimizer_Adam_learningrate_0.1_l1Norm_0 calculated, 5/192
MLP_modelID_1_weights_balanced_optimizer_Adam_learningrate_0.1_l1Norm_0.0001 calculated, 6/192
MLP_modelID_1_weights_balanced_optimizer_Adam_learningrate_1_l1Norm_0 calculated, 7/192
MLP_modelID_1_weights_balanced_optimizer_Adam_learningrate_1_l1Norm_0.0001 calculated, 8/192
MLP_modelID_1_weights_balanced_optimizer_LBFGS_learningrate_0.0003_l1Norm_0 calculated, 9/192
MLP_modelID_1_weights_balanced_optimizer_LBFGS_learningrate_0.0003_l1Norm_0.0001 calculated, 10/192
MLP_modelID_1_weights_balanced_optimizer_LBFGS_learningrate_0.01_l1Norm_0 calculated, 11/192
MLP_modelID_1_weights_balanced_optimizer_LBFGS_learningrate_0.

MLP_modelID_2_weights_normal_optimizer_SGD_learningrate_0.01_l1Norm_0 calculated, 91/192
MLP_modelID_2_weights_normal_optimizer_SGD_learningrate_0.01_l1Norm_0.0001 calculated, 92/192
MLP_modelID_2_weights_normal_optimizer_SGD_learningrate_0.1_l1Norm_0 calculated, 93/192
MLP_modelID_2_weights_normal_optimizer_SGD_learningrate_0.1_l1Norm_0.0001 calculated, 94/192
MLP_modelID_2_weights_normal_optimizer_SGD_learningrate_1_l1Norm_0 calculated, 95/192
MLP_modelID_2_weights_normal_optimizer_SGD_learningrate_1_l1Norm_0.0001 calculated, 96/192
MLP_modelID_3_weights_balanced_optimizer_Adam_learningrate_0.0003_l1Norm_0 calculated, 97/192
MLP_modelID_3_weights_balanced_optimizer_Adam_learningrate_0.0003_l1Norm_0.0001 calculated, 98/192
MLP_modelID_3_weights_balanced_optimizer_Adam_learningrate_0.01_l1Norm_0 calculated, 99/192
MLP_modelID_3_weights_balanced_optimizer_Adam_learningrate_0.01_l1Norm_0.0001 calculated, 100/192
MLP_modelID_3_weights_balanced_optimizer_Adam_learningrate_0.1_l1Norm_0 calc

In [21]:
#Display the results (A more detailed version in detail ResultDiscussion.ipynb)
table = pd.DataFrame(pd.read_csv("MLP_results.csv", header=0, index_col=0))
print(table.describe())
table = table.sort_values(["Gap_test"], ascending=False)
table = table[:15]
display(table)

       epochs  learning_rate    L1_norm      VBS_test     SBS_test  \
count   192.0     192.000000  192.00000  1.920000e+02   192.000000   
mean    200.0       0.277575    0.00005  1.910834e+03  5106.399455   
std       0.0       0.419995    0.00005  2.279681e-13     0.000000   
min     200.0       0.000300    0.00000  1.910834e+03  5106.399455   
25%     200.0       0.007575    0.00000  1.910834e+03  5106.399455   
50%     200.0       0.055000    0.00005  1.910834e+03  5106.399455   
75%     200.0       0.325000    0.00010  1.910834e+03  5106.399455   
max     200.0       1.000000    0.00010  1.910834e+03  5106.399455   

       Accuracy_test    Score_test    Gap_test     VBS_train     SBS_train  \
count     192.000000    192.000000  192.000000  1.920000e+02  1.920000e+02   
mean        0.550481   9108.116141   -1.252272  1.936892e+03  4.801997e+03   
std         0.076873   6272.587019    1.962903  2.279681e-13  9.118725e-13   
min         0.129300   5105.056378   -9.598629  1.936892e

Unnamed: 0,model,epochs,weights,criterion,optimizer,learning_rate,L1_norm,VBS_test,SBS_test,Accuracy_test,Score_test,Gap_test,VBS_train,SBS_train,Accuracy_train,Score_train,Gap_train
MLP_modelID_3_weights_normal_optimizer_SGD_learningrate_0.01_l1Norm_0.0001,Large_Dropout,200.0,normal,CrossEntropyLoss,SGD,0.01,0.0001,1910.833772,5106.399455,0.5868,5105.056378,0.0004202941,1936.891656,4801.996742,0.5873,4801.661205,0.000117
MLP_modelID_3_weights_normal_optimizer_SGD_learningrate_0.01_l1Norm_0,Large_Dropout,200.0,normal,CrossEntropyLoss,SGD,0.01,0.0,1910.833772,5106.399455,0.5865,5106.398092,4.265369e-07,1936.891656,4801.996742,0.5873,4804.008512,-0.000702
MLP_modelID_2_weights_balanced_optimizer_SGD_learningrate_1_l1Norm_0,Small,200.0,balanced,CrossEntropyLoss,SGD,1.0,0.0,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_4_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0.0001,Large,200.0,balanced,CrossEntropyLoss,SGD,0.01,0.0001,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_2_weights_normal_optimizer_Adam_learningrate_1_l1Norm_0.0001,Small,200.0,normal,CrossEntropyLoss,Adam,1.0,0.0001,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_2_weights_normal_optimizer_Adam_learningrate_1_l1Norm_0,Small,200.0,normal,CrossEntropyLoss,Adam,1.0,0.0,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_4_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0,Large,200.0,balanced,CrossEntropyLoss,SGD,0.01,0.0,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_2_weights_normal_optimizer_Adam_learningrate_0.1_l1Norm_0,Small,200.0,normal,CrossEntropyLoss,Adam,0.1,0.0,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_4_weights_balanced_optimizer_SGD_learningrate_0.0003_l1Norm_0.0001,Large,200.0,balanced,CrossEntropyLoss,SGD,0.0003,0.0001,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_4_weights_balanced_optimizer_Adam_learningrate_0.1_l1Norm_0,Large,200.0,balanced,CrossEntropyLoss,Adam,0.1,0.0,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0


In [22]:
#Second iteration of MLP models, more complex models
#This code would clear the results file
'''results = pd.DataFrame(columns=["model", "dropout", "activation", "epochs", "weights", "criterion", "optimizer", "learning_rate", "L1_norm", "L2_norm", "VBS_test", "SBS_test", "Accuracy_test", "Score_test", "Gap_test", "VBS_train", "SBS_train","Accuracy_train", "Score_train", "Gap_train"])
results.to_csv("MLP_results2.csv", index=True, sep=",", header=True, index_label=None, na_rep="")'''

In [None]:
#Second Grid Search for MLPs
#Most of the code is identical to the first generation. For the documentation, the models were trained in different cells
from sklearn.svm import SVC
import pandas as pd
import torch
import torch.nn as nn
from torch.nn import CrossEntropyLoss
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from IPython.display import display
from torch.utils.data import DataLoader, TensorDataset

#Function to convert labels to integers and back
def label_to_int(label):
    for index in label.index:
        match label.at[index, "algorithm"]:
            case "greedy": label.at[index, "algorithm"] = 0
            case "dynamic_programming": label.at[index, "algorithm"] = 1
            case "branch_and_bound": label.at[index, "algorithm"] = 2
            case _: pass 

def int_to_label(label):
    for index in label.index:
        match label.at[index, "algorithm"]:
            case 0: label.at[index, "algorithm"] = "greedy"
            case 1: label.at[index, "algorithm"] = "dynamic_programming"
            case 2: label.at[index, "algorithm"] = "branch_and_bound"
            case _: pass

#Function to determine the average score given the predictions vector
#Given the scores and the predictions as dataframe with an index and a single column "algorithm"
def calculate_score(scores, predictions):
    for index in predictions.index:
        algorithm = predictions.at[index, "algorithm"]
        predictions.at[index, "score"] = scores.at[index, algorithm]
    return predictions["score"].mean()

#Define the deep neural network 5
def create_model_5(l2_reg=0.01): #l2_reg is overwritten later
    class Model_Very_Large_Dropout(nn.Module): #ReLU with dropout
        def __init__(self, l2_reg=0.01):
            #All models consists of the same structure, some with dropout and some with Leaky ReLu activation
            super(Model_Very_Large_Dropout, self).__init__()
            self.fc1 = nn.Linear(2002, 1500)
            self.bn1 = nn.BatchNorm1d(1500)
            self.dropout1 = nn.Dropout(p=0.5)
            self.fc2 = nn.Linear(1500, 1000)
            self.bn2 = nn.BatchNorm1d(1000)
            self.dropout2 = nn.Dropout(p=0.5)
            self.fc3 = nn.Linear(1000, 500)
            self.bn3 = nn.BatchNorm1d(500)
            self.dropout3 = nn.Dropout(p=0.5)
            self.fc4 = nn.Linear(500, 100)
            self.bn4 = nn.BatchNorm1d(100)
            self.dropout4 = nn.Dropout(p=0.5)
            self.fc5 = nn.Linear(100, 50)
            self.bn5 = nn.BatchNorm1d(50)
            self.dropout5 = nn.Dropout(p=0.5)
            self.fc6 = nn.Linear(50, 3)
            self.relu = nn.ReLU()
            self.softmax = nn.Softmax(dim=1)
            self.l2_reg = l2_reg

        #Forward pass, compared to the first generation only the amount of layers is increased
        def forward(self, x):
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.dropout1(x)
            x = self.fc2(x)
            x = self.bn2(x)
            x = self.relu(x)
            x = self.dropout2(x)
            x = self.fc3(x)
            x = self.bn3(x)
            x = self.relu(x)
            x = self.dropout3(x)
            x = self.fc4(x)
            x = self.bn4(x)
            x = self.relu(x)
            x = self.dropout4(x)
            x = self.fc5(x)
            x = self.bn5(x)
            x = self.relu(x)
            x = self.dropout5(x)
            x = self.fc6(x)
            x = self.softmax(x)
            return x
    return Model_Very_Large_Dropout(l2_reg=l2_reg)

#Define the deep neural network 6
def create_model_6(l2_reg=0.01):
    class Model_Very_Large(nn.Module): #ReLU, without dropout
        def __init__(self, l2_reg=0.01):
            super(Model_Very_Large, self).__init__()
            self.fc1 = nn.Linear(2002, 1500)
            self.bn1 = nn.BatchNorm1d(1500)
            self.fc2 = nn.Linear(1500, 1000)
            self.bn2 = nn.BatchNorm1d(1000)
            self.fc3 = nn.Linear(1000, 500)
            self.bn3 = nn.BatchNorm1d(500)
            self.fc4 = nn.Linear(500, 100)
            self.bn4 = nn.BatchNorm1d(100)
            self.fc5 = nn.Linear(100, 50)
            self.bn5 = nn.BatchNorm1d(50)
            self.fc6 = nn.Linear(50, 3)
            self.relu = nn.ReLU()
            self.softmax = nn.Softmax(dim=1)
            self.l2_reg = l2_reg

        def forward(self, x):
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.fc2(x)
            x = self.bn2(x)
            x = self.relu(x)
            x = self.fc3(x)
            x = self.bn3(x)
            x = self.relu(x)
            x = self.fc4(x)
            x = self.bn4(x)
            x = self.relu(x)
            x = self.fc5(x)
            x = self.bn5(x)
            x = self.relu(x)
            x = self.fc6(x)
            x = self.softmax(x)
            return x
    return Model_Very_Large(l2_reg=l2_reg)

#Define the deep neural network 7
def create_model_7(l2_reg=0.01):
    class Model_Very_Large_Dropout_Leaky(nn.Module): #Leaky ReLU with dropout
        def __init__(self, l2_reg=0.01):
            super(Model_Very_Large_Dropout_Leaky, self).__init__()
            self.fc1 = nn.Linear(2002, 1500)
            self.bn1 = nn.BatchNorm1d(1500)
            self.dropout1 = nn.Dropout(p=0.5)
            self.fc2 = nn.Linear(1500, 1000)
            self.bn2 = nn.BatchNorm1d(1000)
            self.dropout2 = nn.Dropout(p=0.5)
            self.fc3 = nn.Linear(1000, 500)
            self.bn3 = nn.BatchNorm1d(500)
            self.dropout3 = nn.Dropout(p=0.5)
            self.fc4 = nn.Linear(500, 100)
            self.bn4 = nn.BatchNorm1d(100)
            self.dropout4 = nn.Dropout(p=0.5)
            self.fc5 = nn.Linear(100, 50)
            self.bn5 = nn.BatchNorm1d(50)
            self.dropout5 = nn.Dropout(p=0.5)
            self.fc6 = nn.Linear(50, 3)
            self.leaky_relu = nn.LeakyReLU()
            self.softmax = nn.Softmax(dim=1)
            self.l2_reg = l2_reg

        def forward(self, x):
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.leaky_relu(x)
            x = self.dropout1(x)
            x = self.fc2(x)
            x = self.bn2(x)
            x = self.leaky_relu(x)
            x = self.dropout2(x)
            x = self.fc3(x)
            x = self.bn3(x)
            x = self.leaky_relu(x)
            x = self.dropout3(x)
            x = self.fc4(x)
            x = self.bn4(x)
            x = self.leaky_relu(x)
            x = self.dropout4(x)
            x = self.fc5(x)
            x = self.bn5(x)
            x = self.leaky_relu(x)
            x = self.dropout5(x)
            x = self.fc6(x)
            x = self.softmax(x)
            return x
    return Model_Very_Large_Dropout_Leaky(l2_reg=l2_reg)

#Define the deep neural network 8
def create_model_8(l2_reg=0.01):
    class Model_Very_Large_Leaky(nn.Module): #Leaky ReLU without dropout
        def __init__(self, l2_reg=0.01):
            super(Model_Very_Large_Leaky, self).__init__()
            self.fc1 = nn.Linear(2002, 1500)
            self.bn1 = nn.BatchNorm1d(1500)
            self.fc2 = nn.Linear(1500, 1000)
            self.bn2 = nn.BatchNorm1d(1000)
            self.fc3 = nn.Linear(1000, 500)
            self.bn3 = nn.BatchNorm1d(500)
            self.fc4 = nn.Linear(500, 100)
            self.bn4 = nn.BatchNorm1d(100)
            self.fc5 = nn.Linear(100, 50)
            self.bn5 = nn.BatchNorm1d(50)
            self.fc6 = nn.Linear(50, 3)
            self.leaky_relu = nn.LeakyReLU()
            self.softmax = nn.Softmax(dim=1)
            self.l2_reg = l2_reg

        def forward(self, x):
            x = self.fc1(x)
            x = self.bn1(x)
            x = self.leaky_relu(x)
            x = self.fc2(x)
            x = self.bn2(x)
            x = self.leaky_relu(x)
            x = self.fc3(x)
            x = self.bn3(x)
            x = self.leaky_relu(x)
            x = self.fc4(x)
            x = self.bn4(x)
            x = self.leaky_relu(x)
            x = self.fc5(x)
            x = self.bn5(x)
            x = self.leaky_relu(x)
            x = self.fc6(x)
            x = self.softmax(x)
            return x
    return Model_Very_Large_Leaky(l2_reg=l2_reg)

#Initialize the counter and test every combination of the parameters
counter = 1
for learning_rate in [0.01, 0.1]:
    for l1_norm in [0, 0.001]:
        for l2_norm in [0, 0.0001, 0.01]:
            for model_id in [5, 6, 7, 8]:
                for weights_name in ["normal", "balanced"]:
                    for optimizer_name in ["SGD", "Adam"]:
                        
                        #A very large part of the code is identical to the first generation and not described in detail
                        epochs = 200
                        seed = 1
                        model_description = "MLP_modelID_"+str(model_id)+"_weights_"+str(weights_name)+"_optimizer_"+str(optimizer_name)+"_learningrate_"+str(learning_rate)+"_l1Norm_"+str(l1_norm)+"_l2Norm_"+str(l2_norm)
                        result_path = "MLP_results2.csv"
                        scaled_instances = "instances.csv"
                        
                        #Load the data
                        scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                        scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})
                        data = pd.DataFrame(pd.read_csv(scaled_instances, header=0, index_col=0))
                        results = pd.DataFrame(pd.read_csv(result_path, header=0, index_col=0))

                        #Write parameter combination to the results
                        results.at[model_description, "epochs"] = epochs
                        results.at[model_description, "weights"] = weights_name
                        results.at[model_description, "optimizer"] = optimizer_name
                        results.at[model_description, "learning_rate"] = learning_rate
                        results.at[model_description, "L1_norm"] = l1_norm
                        results.at[model_description, "L2_norm"] = l2_norm
                        results.at[model_description, "criterion"] = "CrossEntropyLoss"

                        #Split the data into training and testing sets
                        X_train, X_test, y_train, y_test = train_test_split(data, scores, test_size=0.2, random_state=seed)

                        #Calculation of SBS and VBS for the test set
                        test_scores = pd.DataFrame(y_test)
                        test_scores["best_solver"] = test_scores.min(axis=1)

                        average_greedy, average_dynamic_programming, average_branch_and_bound, test_average_best_solver = test_scores.mean(axis=0)
                        test_average_single_best_solver = min(average_greedy, average_dynamic_programming, average_branch_and_bound)

                        results.at[model_description, "VBS_test"] = test_average_best_solver
                        results.at[model_description, "SBS_test"] = test_average_single_best_solver

                        #Calculation of SBS and VBS for the train set
                        train_scores = pd.DataFrame(y_train)
                        train_scores["best_solver"] = train_scores.min(axis=1)

                        average_greedy, average_dynamic_programming, average_branch_and_bound, train_average_best_solver = train_scores.mean(axis=0)
                        train_average_single_best_solver = min(average_greedy, average_dynamic_programming, average_branch_and_bound)

                        results.at[model_description, "VBS_train"] = train_average_best_solver
                        results.at[model_description, "SBS_train"] = train_average_single_best_solver

                        #Prepare the labels
                        y_train = pd.DataFrame(y_train.idxmin(axis=1), columns=["algorithm"])
                        y_test = pd.DataFrame(y_test.idxmin(axis=1), columns=["algorithm"])
                        y_train_accuracy = y_train.copy()
                        y_test_accuracy = y_test.copy()

                        label_to_int(y_train)
                        y_train = y_train.astype(int)
                        label_to_int(y_test)
                        y_test = y_test.astype(int)

                        #Convert labels to integers
                        label_to_int(y_train)
                        label_to_int(y_test)

                        y_train_indices = y_train.index
                        y_test_indices = y_test.index

                        #---------------------------------------------------------------------------------------------------------------
                        #MODEL HERE
                        #Convert to torch tensors
                        X_train = torch.tensor(X_train.values, dtype=torch.float)
                        y_train = torch.tensor(y_train.values, dtype=torch.long)
                        X_test = torch.tensor(X_test.values, dtype=torch.float)
                        y_test = torch.tensor(y_test.values, dtype=torch.long)

                        #Define the model and write the information to the DataFrame results
                        match model_id:
                            case 5:
                                results.at[model_description, "model"] = "Very_Large_Dropout"
                                results.at[model_description, "dropout"] = "Yes"
                                results.at[model_description, "activation"] = "ReLU"
                                model = create_model_5(l2_norm) #Create with l2 norm
                            case 6:
                                results.at[model_description, "model"] = "Very_Large"
                                results.at[model_description, "dropout"] = "No"
                                results.at[model_description, "activation"] = "ReLU"
                                model = create_model_6(l2_norm) #Create with l2 norm
                            case 7:
                                results.at[model_description, "model"] = "Very_Large_Dropout_Leaky"
                                results.at[model_description, "dropout"] = "Yes"
                                results.at[model_description, "activation"] = "Leaky ReLU"
                                model = create_model_7(l2_norm) #Create with l2 norm
                            case 8:
                                results.at[model_description, "model"] = "Very_Large_Leaky"
                                results.at[model_description, "dropout"] = "No"
                                results.at[model_description, "activation"] = "Leaky ReLU"
                                model = create_model_8(l2_norm) #Create with l2 norm

                        #Define the weights
                        match weights_name:
                            case "normal":
                                weights = [1., 1., 1.]
                            case "balanced":
                                weights = [0.5678, 1.1445, 2.7385]

                        #Define the loss function and optimizer                    
                        criterion = nn.CrossEntropyLoss(torch.tensor(weights))
                        #This time, without the LBFGS optimizer
                        match optimizer_name:
                            case "Adam":
                                optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
                            case "SGD": 
                                optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

                        #Train the model
                        for epoch in range(epochs):                          
                            optimizer.zero_grad()
                            outputs = model(X_train)
                            loss = criterion(outputs, y_train.squeeze(1))

                            #Add l1 norm
                            l1_lambda = l1_norm
                            l1_reg = torch.tensor(0.)
                            for name, param in model.named_parameters():
                                l1_reg += torch.norm(param, 1)
                            loss += l1_lambda * l1_reg

                            loss.backward()
                            optimizer.step()

                        #Evaluate the model on the train set
                        with torch.no_grad():
                            outputs = model(X_train)
                            predicted = torch.argmax(outputs, dim=1) 

                        predictions_train = pd.DataFrame(predicted, columns=["algorithm"], index=y_train_indices)

                        #Evaluate the model on the test set
                        with torch.no_grad():
                            outputs = model(X_test)
                            predicted = torch.argmax(outputs, dim=1) 

                        predictions_test = pd.DataFrame(predicted, columns=["algorithm"], index=y_test_indices)
                        #---------------------------------------------------------------------------------------------------------------

                        #Calculate score on the train set
                        int_to_label(predictions_train)

                        scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                        scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})

                        score_train = calculate_score(scores, predictions_train)
                        results.at[model_description, "Score_train"] = score_train

                        #Calculate score on the test set
                        int_to_label(predictions_test)

                        scores = pd.DataFrame(pd.read_csv("scores.csv", header=0, index_col=0))
                        scores = scores.rename(columns={"dynamic_programming_bellman_array":"dynamic_programming", "branch_and_bound_sort": "branch_and_bound"})

                        score_test = calculate_score(scores, predictions_test)
                        results.at[model_description, "Score_test"] = score_test

                        #Calculate the accuracy
                        predictions_train = pd.DataFrame(predictions_train)
                        predictions_test = pd.DataFrame(predictions_test)

                        accuracy_train = accuracy_score(y_train_accuracy, predictions_train["algorithm"]).round(4)
                        accuracy_test = accuracy_score(y_test_accuracy, predictions_test["algorithm"]).round(4)
                        results.at[model_description, "Accuracy_train"] = accuracy_train
                        results.at[model_description, "Accuracy_test"] = accuracy_test

                        #Calculate the gaps
                        VBS_train = train_average_best_solver
                        SBS_train = train_average_single_best_solver
                        AAS_train = score_train
                        VBS_test = test_average_best_solver
                        SBS_test = test_average_single_best_solver
                        AAS_test = score_test

                        Gap_train = SBS_train - VBS_train
                        Gap_test = SBS_test - VBS_test
                        Score_train = AAS_train - VBS_train
                        Score_test = AAS_test - VBS_test

                        GapClose_train = 1-(Score_train/Gap_train)
                        GapClose_test = 1-(Score_test/Gap_test)
                        results.at[model_description, "Gap_train"] = GapClose_train
                        results.at[model_description, "Gap_test"] = GapClose_test

                        #Overwrite the results file
                        results.to_csv(result_path, index=True, sep=",", header=True, index_label=None, na_rep="")
                        
                        #Output the progress
                        print(str(model_description)+" calculated, "+str(counter)+"/192")
                        counter += 1

MLP_modelID_5_weights_normal_optimizer_SGD_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 1/192
MLP_modelID_5_weights_normal_optimizer_Adam_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 2/192
MLP_modelID_5_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 3/192
MLP_modelID_5_weights_balanced_optimizer_Adam_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 4/192
MLP_modelID_6_weights_normal_optimizer_SGD_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 5/192
MLP_modelID_6_weights_normal_optimizer_Adam_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 6/192
MLP_modelID_6_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 7/192
MLP_modelID_6_weights_balanced_optimizer_Adam_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 8/192
MLP_modelID_7_weights_normal_optimizer_SGD_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 9/192
MLP_modelID_7_weights_normal_optimizer_Adam_learningrate_0.01_l1Norm_0_l2Norm_0 calculated, 10/192
MLP_modelID_7_we

In [25]:
#Display the results
table = pd.DataFrame(pd.read_csv("MLP_results2.csv", header=0, index_col=0))
print(table.describe())
table = table.sort_values(["Gap_test"], ascending=False)
table = table[:15]
display(table)

       epochs  learning_rate     L1_norm     L2_norm      VBS_test  \
count   192.0     192.000000  192.000000  192.000000  1.920000e+02   
mean    200.0       0.055000    0.000500    0.003367  1.910834e+03   
std       0.0       0.045118    0.000501    0.004703  2.279681e-13   
min     200.0       0.010000    0.000000    0.000000  1.910834e+03   
25%     200.0       0.010000    0.000000    0.000000  1.910834e+03   
50%     200.0       0.055000    0.000500    0.000100  1.910834e+03   
75%     200.0       0.100000    0.001000    0.010000  1.910834e+03   
max     200.0       0.100000    0.001000    0.010000  1.910834e+03   

          SBS_test  Accuracy_test    Score_test    Gap_test     VBS_train  \
count   192.000000     192.000000    192.000000  192.000000  1.920000e+02   
mean   5106.399455       0.557578   9075.447949   -1.242049  1.936892e+03   
std       0.000000       0.043381   4591.248420    1.436756  2.279681e-13   
min    5106.399455       0.445100   5077.828221   -4.097955  

Unnamed: 0,model,dropout,activation,epochs,weights,criterion,optimizer,learning_rate,L1_norm,L2_norm,VBS_test,SBS_test,Accuracy_test,Score_test,Gap_test,VBS_train,SBS_train,Accuracy_train,Score_train,Gap_train
MLP_modelID_8_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0.001_l2Norm_0.01,Very_Large_Leaky,No,Leaky ReLU,200.0,balanced,CrossEntropyLoss,SGD,0.01,0.001,0.01,1910.833772,5106.399455,0.5935,5077.828221,0.008941,1936.891656,4801.996742,0.618,4690.462641,0.038928
MLP_modelID_8_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0_l2Norm_0.01,Very_Large_Leaky,No,Leaky ReLU,200.0,balanced,CrossEntropyLoss,SGD,0.01,0.0,0.01,1910.833772,5106.399455,0.5879,5099.41234,0.002187,1936.891656,4801.996742,0.6469,4223.346248,0.201965
MLP_modelID_8_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0_l2Norm_0,Very_Large_Leaky,No,Leaky ReLU,200.0,balanced,CrossEntropyLoss,SGD,0.01,0.0,0.0,1910.833772,5106.399455,0.5865,5105.057725,0.00042,1936.891656,4801.996742,0.6002,4748.737471,0.018589
MLP_modelID_6_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0_l2Norm_0,Very_Large,No,ReLU,200.0,balanced,CrossEntropyLoss,SGD,0.01,0.0,0.0,1910.833772,5106.399455,0.5863,5105.650236,0.000234,1936.891656,4801.996742,0.623,4294.52541,0.177121
MLP_modelID_7_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0.001_l2Norm_0.01,Very_Large_Dropout_Leaky,Yes,Leaky ReLU,200.0,balanced,CrossEntropyLoss,SGD,0.01,0.001,0.01,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_5_weights_balanced_optimizer_SGD_learningrate_0.01_l1Norm_0.001_l2Norm_0.0001,Very_Large_Dropout,Yes,ReLU,200.0,balanced,CrossEntropyLoss,SGD,0.01,0.001,0.0001,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_7_weights_normal_optimizer_Adam_learningrate_0.1_l1Norm_0_l2Norm_0.0001,Very_Large_Dropout_Leaky,Yes,Leaky ReLU,200.0,normal,CrossEntropyLoss,Adam,0.1,0.0,0.0001,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_5_weights_normal_optimizer_SGD_learningrate_0.01_l1Norm_0.001_l2Norm_0.0001,Very_Large_Dropout,Yes,ReLU,200.0,normal,CrossEntropyLoss,SGD,0.01,0.001,0.0001,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_8_weights_balanced_optimizer_Adam_learningrate_0.01_l1Norm_0.001_l2Norm_0,Very_Large_Leaky,No,Leaky ReLU,200.0,balanced,CrossEntropyLoss,Adam,0.01,0.001,0.0,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
MLP_modelID_7_weights_balanced_optimizer_SGD_learningrate_0.1_l1Norm_0_l2Norm_0.0001,Very_Large_Dropout_Leaky,Yes,Leaky ReLU,200.0,balanced,CrossEntropyLoss,SGD,0.1,0.0,0.0001,1910.833772,5106.399455,0.5863,5106.399455,0.0,1936.891656,4801.996742,0.5872,4801.996742,0.0
