In [15]:
import pandas as pd
import numpy as np
import seaborn as sns

# Import modules from Scikit-learn
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split   # Import train_test_split function
from sklearn import metrics   # import metrics modules for accuracy calculation
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import time

In [16]:
# Read data
PATH = "../../my_data/identification-dataset/my_custom_data/anblock-error-dataset.csv"
df = pd.read_csv(PATH)

# Drop uncomplete rows
df.dropna(inplace=True)

In [17]:
# Set training data
train_df = df.drop('material', axis=1)

# Extracted features 
X = train_df.drop('encoded_material', axis=1)
y = train_df['encoded_material'] # Labels

In [18]:
# Feature Scaling
scaler = MinMaxScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [19]:
# Split dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) # 70% training and 30% test

In [20]:
# different variations of hidden layer combinations
hidden_layers_50 = [(50,), (50, 50), (50, 50, 50), (50, 50, 50, 50), (50, 50, 50, 50, 50), (50, 50, 50, 50, 50, 50)]
hidden_layers_100 = [(100,), (100, 100), (100, 100, 100), (100, 100, 100, 100), (100, 100, 100, 100, 100), (100, 100, 100, 100, 100, 100)]

In [21]:
# Create an empty list to store results
def trainer(hidden_layer_sizes_list):
    results = []
    for sizes in hidden_layer_sizes_list:
        # Create a pipeline object for the model
        pipe_MLP = make_pipeline(StandardScaler(),
                                MLPClassifier(solver='adam',
                                            activation='relu',
                                            
                                            hidden_layer_sizes=sizes,
                                            random_state=0,
                                            max_iter=500           # TODO: tune it later
                                            # verbose=True
                                            )
                                )
        
        # Measure training time
        start_train = time.time()
        pipe_MLP.fit(X_train, y_train)
        end_train = time.time()
        train_time_per_sample = (end_train - start_train) / len(X_train)

        # Measure test time
        start_test = time.time()
        y_pred = pipe_MLP.predict(X_test)
        end_test = time.time()
        test_time_per_sample = (end_test - start_test) / len(X_test)
        
        # Evaluate the pipeline and store the results
        accuracy = metrics.accuracy_score(y_test, y_pred)
        precision = metrics.precision_score(y_test, y_pred, average="macro")
        recall = metrics.recall_score(y_test, y_pred, average="macro")
        f1 = metrics.recall_score(y_test, y_pred, average="macro")


        results.append({
            'hidden_layer_sizes': sizes,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'train_time_per_sample': train_time_per_sample,
            'test_time_per_sample': test_time_per_sample 
        })
    return results

In [22]:
results = []
for x in np.arange(10):
    r = trainer(hidden_layer_sizes_list = hidden_layers_50)
    results.append(r)



In [36]:
# show one of 10 samples
results_df = pd.DataFrame(results[0])
results_df

Unnamed: 0,hidden_layer_sizes,accuracy,precision,recall,f1,train_time_per_sample,test_time_per_sample
0,"(50,)",0.98186,0.982025,0.982028,0.982028,0.001395,5.787e-07
1,"(50, 50)",0.991154,0.99135,0.991123,0.991123,0.00129,1.333187e-06
2,"(50, 50, 50)",0.988019,0.988097,0.98822,0.98822,0.000961,1.565933e-06
3,"(50, 50, 50, 50)",0.98046,0.981017,0.980441,0.980441,0.000819,2.899774e-06
4,"(50, 50, 50, 50, 50)",0.980404,0.980523,0.980583,0.980583,0.00088,3.198421e-06
5,"(50, 50, 50, 50, 50, 50)",0.983875,0.98393,0.984097,0.984097,0.00145,4.615864e-06
