#### Import packages

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

#### Load dataset and split

In [19]:
dataset = datasets.load_digits()
X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Define the model, fit and predict

In [22]:
activation_functions = ['identity', 'logistic', 'tanh', 'relu']
results_raw = []

max_iter = 10000

# Standard Scaler for feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

results_scaled = []

for activation in activation_functions:
    print(f"\n=== Activation func: {activation} ===")
    
    mlp = MLPClassifier(
        solver='adam',
        hidden_layer_sizes=(50,),
        activation=activation,
        max_iter=max_iter,
        random_state=42
    )
    
    mlp.fit(X_train_scaled, y_train)
    y_pred = mlp.predict(X_test_scaled)
    
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    
    results_scaled.append({
        'activation': activation,
        'precision': precision,
        'recall': recall,
        'f1': f1
    })


=== Activation func: identity ===
Precision: 0.9676
Recall: 0.9683
F1 Score: 0.9678

=== Activation func: logistic ===
Precision: 0.9741
Recall: 0.9740
F1 Score: 0.9740

=== Activation func: tanh ===
Precision: 0.9706
Recall: 0.9702
F1 Score: 0.9702

=== Activation func: relu ===
Precision: 0.9737
Recall: 0.9722
F1 Score: 0.9727


#### GridSearch to find the best parameters

In [None]:
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],  # Structure of hidden layers
    'activation': ['logistic', 'tanh', 'relu'],                   # Activation functions
    'solver': ['adam', 'sgd'],                        # Optimizer
    'alpha': [0.0001, 0.001],                         # L2 regularization term
    'learning_rate_init': [0.001, 0.01]               # Initial learning rate
}

grid_search = GridSearchCV(
    estimator=MLPClassifier(max_iter=max_iter, random_state=42),
    param_grid=param_grid,
    cv=3,                           
    scoring='f1_macro',             
    n_jobs=-1,                      # Use all available cores
    verbose=2,                       # Print detailed output
)


grid_search.fit(X_train, y_train)

# Print the best parameters found by GridSearchCV
print("Best parameters:", grid_search.best_params_)

# Predict using the best estimator
best_mlp = grid_search.best_estimator_
y_pred_best = best_mlp.predict(X_test)

# Evaluate the best model
precision_best = precision_score(y_test, y_pred_best, average='macro')
recall_best = recall_score(y_test, y_pred_best, average='macro')
f1_best = f1_score(y_test, y_pred_best, average='macro')

print(f"Optimized Precision: {precision_best:.4f}")
print(f"Optimized Recall: {recall_best:.4f}")
print(f"Optimized F1 Score: {f1_best:.4f}")

Fitting 3 folds for each of 72 candidates, totalling 216 fits
Best parameters: {'activation': 'tanh', 'alpha': 0.001, 'hidden_layer_sizes': (100,), 'learning_rate_init': 0.01, 'solver': 'adam'}
Optimized Precision: 0.9776
Optimized Recall: 0.9775
Optimized F1 Score: 0.9774
