In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
hit = pd.read_csv('Hitters.csv')
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df['Salary']
X_ = df.drop(['League', 'Division', 'NewLeague', 'Salary'], axis=1).astype('float')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [3]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
from sklearn.neural_network import MLPRegressor

In [5]:
mlp_model = MLPRegressor().fit(X_train_scaled, y_train)



In [6]:
mlp_model.n_layers_

3

In [7]:
mlp_model.hidden_layer_sizes

(100,)

In [8]:
y_pred = mlp_model.predict(X_test_scaled)

In [9]:
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(y_test, y_pred))

656.1063697459556

MODEL TUNING

In [10]:
mlp_params = {"alpha": [0.01, 0.02, 0.1, 0.005], 
              "hidden_layer_sizes": [(20, 20), (100,50, 150), (300,200, 150)],
              "activation": ["relu", "logistic"]}

In [11]:
from sklearn.model_selection import GridSearchCV
mlp_cv_model = GridSearchCV(mlp_model, mlp_params, cv = 10).fit(X_train_scaled, y_train)



In [12]:
mlp_cv_model.best_params_

{'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (100, 50, 150)}

In [15]:
mlp_tuned = MLPRegressor(alpha=0.01, hidden_layer_sizes=(100, 50, 150)).fit(X_train, y_train)



In [16]:
mlp_tuned_pred = mlp_tuned.predict(X_test)

In [17]:
np.sqrt(mean_squared_error(y_test, mlp_tuned_pred))

325.3046752714159