In [100]:
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statistics as sts

from sklearn import linear_model, metrics, preprocessing
from sklearn.model_selection import train_test_split, cross_val_score

## Multilinear Models with Standard RL Stats
#### This sections includes the use of Cross Validation and the use of regression metrics to evaluate which model is the best.

In [101]:
full_data = pd.read_csv("joined_data.csv")
full_data
data = full_data[full_data.columns[4:]] # Dropping first columns. 
data

Unnamed: 0,core_goals,core_assists,core_saves,core_shots,core_score,demo_inflicted,demo_taken,advanced_rating
0,3,0,4,9,1119,2,2,0.863590
1,2,5,2,13,1209,2,0,1.350500
2,4,2,2,12,1077,2,5,1.232970
3,1,1,8,5,1086,1,2,0.806692
4,3,0,4,4,948,3,3,1.077983
...,...,...,...,...,...,...,...,...
25479,6,8,10,18,2886,6,5,1.306242
25480,3,4,6,12,1719,7,9,0.755061
25481,6,3,11,11,2462,8,5,1.148616
25482,6,2,5,27,2251,10,10,0.949304


In [102]:
mp = pd.read_csv("matches_by_players.csv")
mp
#player_list = {mp.iloc[row]["player_id"]:mp.iloc[row]["player_tag"] for row in range(len(mp))}
player_list = dict(zip(mp.player_id, mp.player_tag))

In [103]:
pd.DataFrame(list(zip(mp.player_id, mp.player_tag, mp.match_id)))

Unnamed: 0,0,1,2
0,5f3d8fdd95f40596eae2412e,Amphis,6159ad3d143c37878b2384a9
1,5f3d8fdd95f40596eae23e01,Torsos,6159ad3d143c37878b2384a9
2,5f3d8fdd95f40596eae23e53,Express,6159ad3d143c37878b2384a9
3,5f7ca648ea8a0f0714fb9a20,Laxin,6159ad3d143c37878b2384a9
4,5f3d8fdd95f40596eae24503,Baked Potato,6159ad3d143c37878b2384a9
...,...,...,...
25618,5f3d8fdd95f40596eae23f8f,Maxeew,62a3988cda9d7ca1c7bb22ba
25619,5f3d8fdd95f40596eae23f3f,Abscrazy,62a3988cda9d7ca1c7bb22ba
25620,5f9c7cde5246bf27936b4572,mikan,62a3988cda9d7ca1c7bb22ba
25621,5f3d8fdd95f40596eae2414a,Burn,62a3988cda9d7ca1c7bb22ba


In [104]:
# Splitting Data
Y = data["advanced_rating"]
X = data[['core_goals','core_assists', 'core_saves', 'core_shots','demo_inflicted', 'demo_taken']]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=404)


In [105]:
model = linear_model.LinearRegression()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(f'model intercept is {model.intercept_} and model coefficients are {model.coef_}')
print(f'model score = {model.score(X_test, Y_test)}')
print(f'mse = {metrics.mean_squared_error(Y_test, y_pred)}')


model intercept is 0.5789492603233665 and model coefficients are [ 0.1347999   0.08098808  0.00997382 -0.00948071 -0.01101295 -0.01653932]
model score = 0.7630103711600398
mse = 0.027826834519614107


In [106]:
modelR = linear_model.Ridge()
modelR.fit(X_train,Y_train)
modelR.score(X_test, Y_test) # 
y_pred = model.predict(X_test)
print(f'model intercept is {model.intercept_} and model coefficients are {model.coef_}')
print(f'model score = {model.score(X_test, Y_test)}')
print(f'mse = {metrics.mean_squared_error(Y_test, y_pred)}')


model intercept is 0.5789492603233665 and model coefficients are [ 0.1347999   0.08098808  0.00997382 -0.00948071 -0.01101295 -0.01653932]
model score = 0.7630103711600398
mse = 0.027826834519614107


make dataframe for coefficients and features

In [107]:
print(f'model intercept is {modelR.intercept_} and model coefficients are {modelR.coef_}')
print(modelR.feature_names_in_)

model intercept is 0.5789520564432473 and model coefficients are [ 0.13479711  0.08098652  0.00997355 -0.00947979 -0.01101291 -0.01653929]
['core_goals' 'core_assists' 'core_saves' 'core_shots' 'demo_inflicted'
 'demo_taken']


In [108]:
modelL = linear_model.Lasso(alpha=0.2)
modelL.fit(X_train, Y_train)
modelL.score(X_test, Y_test)

0.5558597991189412

In [109]:
print(f'model intercept is {modelL.intercept_} and model coefficients are {modelL.coef_}')

model intercept is 0.723353153430412 and model coefficients are [ 0.08015751  0.01269272  0.          0.00068017 -0.         -0.        ]


In [110]:
poly = preprocessing.PolynomialFeatures(2)
trans_data = poly.fit_transform(X_train)
trans_feat = poly.get_feature_names_out(X_train.columns)
ttest_data = poly.fit_transform(X_test)
modelT = linear_model.LinearRegression()
modelT.fit(trans_data, Y_train)
modelT.score(ttest_data,Y_test)

0.8071132636588507

In [111]:
pd.DataFrame(modelT.coef_ , index = trans_feat, columns = ["Trans Model"])

Unnamed: 0,Trans Model
1,2.6762090000000002e-17
core_goals,0.2293901
core_assists,0.1366166
core_saves,0.02664438
core_shots,-0.005892578
demo_inflicted,-0.008269632
demo_taken,-0.01369532
core_goals^2,0.0002997416
core_goals core_assists,-0.000372115
core_goals core_saves,-0.002664612


In [112]:
pd.DataFrame({"Ridge Model":modelR.coef_, "Standard Linear":model.coef_, "Lasso":modelL.coef_}, index = X_train.columns) 

Unnamed: 0,Ridge Model,Standard Linear,Lasso
core_goals,0.134797,0.1348,0.080158
core_assists,0.080987,0.080988,0.012693
core_saves,0.009974,0.009974,0.0
core_shots,-0.00948,-0.009481,0.00068
demo_inflicted,-0.011013,-0.011013,-0.0
demo_taken,-0.016539,-0.016539,-0.0


In [113]:
model_list = [model, modelR, modelL]
for mod in model_list:
    print(f'Cross Validation score for {str(mod)} = {sts.mean(cross_val_score(mod, X_train, Y_train, cv = 5))}')

print(f'Cross Validation score for {str(modelT)} = {sts.mean(cross_val_score(modelT, trans_data, Y_train, cv = 5))}')

Cross Validation score for LinearRegression() = 0.7619347117706917
Cross Validation score for Ridge() = 0.7619347189199067
Cross Validation score for Lasso(alpha=0.2) = 0.5506345243584102
Cross Validation score for LinearRegression() = 0.805819074938354
