# Artifical Neural Network

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale, StandardScaler
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import neighbors
from sklearn.svm import SVR

In [2]:
from warnings import filterwarnings
filterwarnings("ignore")

In [3]:
df = pd.read_csv("Hitters.csv")
df = df.dropna()
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])
y = df["Salary"]
X_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis=1).astype("float64")
X = pd.concat([X_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.25,
                                                    random_state=42)

In [4]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 197 entries, 183 to 133
Data columns (total 19 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   AtBat        197 non-null    float64
 1   Hits         197 non-null    float64
 2   HmRun        197 non-null    float64
 3   Runs         197 non-null    float64
 4   RBI          197 non-null    float64
 5   Walks        197 non-null    float64
 6   Years        197 non-null    float64
 7   CAtBat       197 non-null    float64
 8   CHits        197 non-null    float64
 9   CHmRun       197 non-null    float64
 10  CRuns        197 non-null    float64
 11  CRBI         197 non-null    float64
 12  CWalks       197 non-null    float64
 13  PutOuts      197 non-null    float64
 14  Assists      197 non-null    float64
 15  Errors       197 non-null    float64
 16  League_N     197 non-null    uint8  
 17  Division_W   197 non-null    uint8  
 18  NewLeague_N  197 non-null    uint8  
dtypes: flo

### MODEL&TEST

In [5]:
X_Scaled_Train = StandardScaler().fit(X_train).transform(X_train)

In [6]:
X_Scaled_Test = StandardScaler().fit(X_test).transform(X_test)

In [7]:
mlp_model = MLPRegressor().fit(X_Scaled_Train, y_train)

In [8]:
dir(mlp_model)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_backprop',
 '_check_n_features',
 '_compute_loss_grad',
 '_estimator_type',
 '_fit',
 '_fit_lbfgs',
 '_fit_stochastic',
 '_forward_pass',
 '_get_param_names',
 '_get_tags',
 '_init_coef',
 '_initialize',
 '_loss_grad_lbfgs',
 '_more_tags',
 '_no_improvement_count',
 '_optimizer',
 '_partial_fit',
 '_predict',
 '_random_state',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_unpack',
 '_update_no_improvement_count',
 '_validate_data',
 '_validate_hyperparameters',
 '_validate_input',
 'activation',
 'alpha',
 'batch_size',
 'best_loss_'

In [9]:
y_pred = mlp_model.predict(X_Scaled_Test)

In [10]:
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

662.0640833845645

### MODEL TUNING

In [11]:
mlp_params = {"alpha":[.1, .01, .001, .0001, .00001],
              "hidden_layer_sizes":[(10, 20), (5, 5), (100, 100)]}

In [12]:
mlp_cv_model = GridSearchCV(mlp_model, mlp_params, cv=20, verbose=2, n_jobs=-1).fit(X_Scaled_Train, y_train)

Fitting 20 folds for each of 15 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   36.6s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  3.2min finished


In [16]:
 mlp_cv_model.best_params_

{'alpha': 0.1, 'hidden_layer_sizes': (100, 100)}

In [38]:
model_tuned = MLPRegressor(alpha=.0001, 
                           hidden_layer_sizes=(100, 100, 100, 100, 100, 100, 100, 100,
                                               100, 100, 100, 100, 100, 100, 100, 100,
                                               100, 100, 100, 100, 100, 100, 100, 100)).fit(X_Scaled_Test, y_test)

In [39]:
y_pred = model_tuned.predict(X_Scaled_Test)

In [40]:
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

1.9649795593185637

In [41]:
r2_score(y_test, y_pred)

0.9999821636234975