In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ml/Hitters.csv")
df.dropna(inplace=True)

In [4]:
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# **Standardization**

In [5]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# MLP model : MLP - Multi-Layer Perceptron Model
**MLP is a neural network based model and is generally used for regression problems.**

In [10]:
mlp_model = MLPRegressor().fit(X_train_scaled, y_train)

In [12]:
mlp_model.get_params()

{'activation': 'relu',
 'alpha': 0.0001,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 200,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [35]:
y_pred = mlp_model.predict(X_test_scaled)

In [36]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
rmse

655.7177974848371

# Model Tuning

In [19]:
mlp_params = {'alpha': [0.1, 0.01, 0.02, 0.001, 0.0],
              "hidden_layer_sizes": [(10,20), (5,5), (100,100)]}

In [23]:
mlp_cv_model = GridSearchCV(mlp_model, mlp_params, cv=10, n_jobs=-1, verbose=2).fit(X_train_scaled, y_train)

Fitting 10 folds for each of 15 candidates, totalling 150 fits


In [26]:
mlp_cv_model.best_params_

{'alpha': 0.0, 'hidden_layer_sizes': (100, 100)}

**Tuned Model**

In [28]:
tuned_model = MLPRegressor(alpha=0.0, hidden_layer_sizes=(100, 100)).fit(X_train_scaled, y_train)

In [33]:
y_pred = tuned_model.predict(X_test_scaled)

In [34]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
rmse

353.8240664163904