In [29]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd

# visualization
import plotly.express as px
import plotly.graph_objects as go

In [30]:
matches = pd.read_csv('./AllMatches.csv')

matches = matches.drop('Unnamed: 0', axis=1)
matches.shape

(17165, 22)

In [31]:
# Remove Nan
matches = matches.dropna()
matches.shape

(17164, 22)

In [32]:
matches.columns

Index(['matchId', 'gameDurationSeconds', 'championBlueTop', 'championBlueJG',
       'championBlueMid', 'championBlueBot', 'championBlueSup',
       'championRedTop', 'championRedJG', 'championRedMid', 'championRedBot',
       'championRedSup', 'goldDifference', 'blueGrubCount', 'redGrubCount',
       'blueDragonCount', 'redDragonCount', 'blueBaronCount', 'redBaronCount',
       'blueAtakhan', 'redAtakhan', 'winner'],
      dtype='object')

In [33]:
features = ["championBlueTop", "championBlueJG", "championBlueMid", "championBlueBot", "championBlueSup", "championRedTop", "championRedJG", "championRedMid", "championRedBot", "championRedSup"]

X, y = matches[features], matches['goldDifference']

In [34]:
TEST_PROP = 0.90
RANDOM_SEED = 0

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_PROP, random_state=RANDOM_SEED)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1716, 10), (1716,), (15448, 10), (15448,))

# LinearRegression

In [35]:
lr = LinearRegression()
lr.fit(X_train, y_train)

In [36]:
y_pred = lr.predict(X_train)
mse_train = mean_squared_error(y_train, y_pred)

y_pred = lr.predict(X_test)
mse_test = mean_squared_error(y_test, y_pred)

print("MSE (training data): %.2f" % mse_train)
print("MSE (test data): %.2f" % mse_test)

MSE (training data): 13038579.91
MSE (test data): 12877175.81


In [37]:
lreg_results = pd.DataFrame({
  'model': ['lr(without regularization)'],
  'train_err': [round(mse_train, 2)],
  'test_err': [round(mse_test, 2)]
})

# Lasso (L1 Regularizer)

In [38]:
lasso = Lasso()
lasso.fit(X_train, y_train)

In [39]:
y_pred = lasso.predict(X_train)
mse_train = mean_squared_error(y_train, y_pred)

y_pred = lasso.predict(X_test)
mse_test = mean_squared_error(y_test, y_pred)

print("MSE (training data): %.2f" % mse_train)
print("MSE (test data): %.2f" % mse_test)

MSE (training data): 13038579.91
MSE (test data): 12877169.84


In [40]:
lasso_results = pd.DataFrame({
  'model': ['lasso'],
  'train_err': [round(mse_train, 2)],
  'test_err': [round(mse_test, 2)]
})

# Ridge (L2 Regularizer)

In [41]:
ridge = Ridge()
ridge.fit(X_train, y_train)

In [42]:
y_pred = ridge.predict(X_train)
mse_train = mean_squared_error(y_train, y_pred)

y_pred = ridge.predict(X_test)
mse_test = mean_squared_error(y_test, y_pred)

print("MSE (training data): %.2f" % mse_train)
print("MSE (test data): %.2f" % mse_test)

MSE (training data): 13038579.91
MSE (test data): 12877175.80


In [43]:
ridge_results = pd.DataFrame({
  'model': ['ridge'],
  'train_err': [round(mse_train, 2)],
  'test_err': [round(mse_test, 2)]
})

# Model Comparison

In [44]:
results = pd.concat([lreg_results, lasso_results, ridge_results], axis=0)

results

Unnamed: 0,model,train_err,test_err
0,lr(without regularization),13038579.91,12877175.81
0,lasso,13038579.91,12877169.84
0,ridge,13038579.91,12877175.8


In [45]:
fig = go.Figure([
    go.Bar(x=results.model, y=results.train_err, name='Training error'),
    go.Bar(x=results.model, y=results.test_err, name='Test error')
]
               )
fig.update_layout(
    title="Model comparison", yaxis_title="MSE")
fig.update_layout(
    legend=dict(
        x=0.05,
        y=0.999
    )
)
fig.show()