In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
hit = pd.read_csv('Hitters.csv')
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df['Salary']
X_ = df.drop(['League', 'Division', 'NewLeague', 'Salary'], axis=1).astype('float')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X = preprocessing.StandardScaler().fit(X).transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [2]:
enet_model = ElasticNet().fit(X_train, y_train)

In [3]:
enet_model.coef_, enet_model.intercept_

(array([ 17.66226648,  52.1088761 ,  10.86782757,  31.45756094,
         17.85981295,  35.06278664,  15.86725134,  27.62426024,
         38.48753489,  21.16109977,  40.27919645,  33.11046495,
         19.82462674,  55.92030111,   5.42592811,  -8.30319616,
         17.1150066 , -38.75044815,   4.75748028]),
 542.2307445042424)

In [4]:
enet_model

In [5]:
enet_model.predict(X_test)

array([ 631.83569021,  676.26951924,  919.8879815 ,  414.95930086,
        538.47581222,  345.37652826,  617.21441719,  425.11436704,
        895.92210348,  658.05382497,  642.49143244,  971.49774968,
        601.38050566,  502.7156146 ,  323.3760472 ,  728.5069664 ,
        769.46591203,   93.23943323,  856.42554955,  399.53184926,
        441.43225853,  721.41475218,  615.77050984,  495.4431687 ,
        717.00812785,  125.8076885 ,  854.18796428,  325.16318492,
        651.98003659,   47.52547605,  190.94111875,   10.86264567,
        375.81273512,  499.18992111,  290.45806201,  740.38744635,
       1227.79795854,  286.21871357,  107.00395297,  281.49126246,
         41.40405861,  360.23141532,  505.62909075,  501.64325694,
        938.55521201,  706.5890011 ,  498.63307369,  350.2393041 ,
        320.49575345,  675.46580146,  747.76133714,  374.46611386,
        654.25072932,  313.4693672 ,  299.92213365,  699.73822864,
        575.82131481,  770.65945727,  135.13697941, 1187.04283

In [6]:
y_pred = enet_model.predict(X_test)

In [7]:
np.sqrt(mean_squared_error(y_test, y_pred))

385.20530489676406

In [8]:
r2_score(y_test, y_pred)

0.3145511122775745

In [9]:
from sklearn.linear_model import ElasticNetCV
enet_cv_model = ElasticNetCV(cv = 10, random_state=0).fit(X_train, y_train)

In [10]:
enet_cv_model.alpha_

1.488250267492299

In [11]:
enet_tuned = ElasticNet(alpha = enet_cv_model.alpha_).fit(X_train, y_train)

In [12]:
np.sqrt(mean_squared_error(y_test, enet_tuned.predict(X_test)))

386.1073772971514