In [17]:
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

%matplotlib widget

import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)

In [18]:
df = pd.read_csv("../archive/song_data.csv")
df.drop_duplicates(subset='song_name', keep='first', inplace=True)
df = df.drop('song_name', axis=1)
df = df.drop('loudness', axis=1)

In [19]:
X, y = df.drop('song_popularity',axis=1).to_numpy(), df['song_popularity'].to_numpy()

In [20]:
X

array([[2.62333e+05, 5.52000e-03, 4.96000e-01, ..., 1.67060e+02,
        4.00000e+00, 4.74000e-01],
       [2.16933e+05, 1.03000e-02, 5.42000e-01, ..., 1.05256e+02,
        4.00000e+00, 3.70000e-01],
       [2.31733e+05, 8.17000e-03, 7.37000e-01, ..., 1.23881e+02,
        4.00000e+00, 3.24000e-01],
       ...,
       [1.82211e+05, 8.47000e-01, 7.19000e-01, ..., 1.30534e+02,
        4.00000e+00, 2.86000e-01],
       [3.52280e+05, 9.45000e-01, 4.88000e-01, ..., 1.06063e+02,
        4.00000e+00, 3.23000e-01],
       [1.93533e+05, 9.11000e-01, 6.40000e-01, ..., 9.14900e+01,
        4.00000e+00, 5.81000e-01]])

In [None]:
my_regression_pipeline = Pipeline(steps=[
    ('scaler', StandardScaler()),
    ('model', MLPRegressor(max_iter=500, random_state=0)) # Hidden layers are by default relu, output layer activation function is 'identity' -> linear
])

param_grid = {
    'model__hidden_layer_sizes': [(100,50,20), (50, 10, 5, 2), (200, 100, 50, 20), (400,200,100,50)],
    'model__alpha': [0.01, 0.03, 0.1, 0.3, 1.0],
    'model__learning_rate_init': [0.0001, 0.0003, 0.001, 0.003, 0.01],
}

grid_search = GridSearchCV(my_regression_pipeline, param_grid, cv=5, scoring='neg_mean_squared_error')

grid_search.fit(X, y)

print("Best Hyperparameters:", grid_search.best_params_)

In [None]:
print("Mean Test Scores:")
print(grid_search.cv_results_['mean_test_score'])

best_index = grid_search.best_index_
print(f"\nBest Hyperparameters Index: {best_index}")

print(f"\nAverage Performance of Best Hyperparameters: {grid_search.cv_results_['mean_test_score'][best_index]}")

Mean Test Scores:
[ -474.72847263  -449.51045212  -333.21603511  -408.92334481
  -408.74037342 -2805.00908219 -2308.57411347 -1327.93130929
  -408.72851462  -409.28575271 -2776.98057589 -2777.49357265
 -1722.79401305  -574.11375972  -409.23270699 -2722.31138159
 -2721.69952241 -1389.62341844  -416.39033517  -408.70941919
  -472.69760537  -449.47563933  -333.18816302  -794.98705574
  -486.84932879 -2805.00908219 -1153.99393667  -114.45155788
  -317.12512304  -409.28582725 -2781.8945221  -2783.16503046
 -2783.69339234 -1475.63898668  -409.23384023 -2722.31138159
 -2721.69952241 -1389.62329449  -416.39025595  -408.70941093
  -472.69882155  -449.47697496  -333.21123105  -778.51948647
  -486.81488433 -2805.00908219 -1092.84352962  -463.29611886
  -278.63808279  -409.28585772 -2781.89448257 -2783.16507252
 -2783.69996382 -1874.50292654  -409.23402582 -2722.31138159
 -2721.69952241 -1389.62329452  -416.38366045  -408.7087516
  -472.70178131  -449.47829943  -333.19726172  -735.86233417
  -510.