In [7]:
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statistics as sts

from sklearn import linear_model, metrics, preprocessing
from sklearn.model_selection import train_test_split, cross_val_score

## Multilinear Models with Standard RL Stats
#### This sections includes the use of Cross Validation and the use of regression metrics to evaluate which model is the best.

In [8]:
full_data = pd.read_csv("joined_data.csv")
full_data
data = full_data[full_data.columns[4:]] # Dropping first columns.
data.head(
)

Unnamed: 0,core_goals,core_assists,core_saves,core_shots,core_score,demo_inflicted,demo_taken,advanced_rating
0,3,0,4,9,1119,2,2,0.86359
1,2,5,2,13,1209,2,0,1.3505
2,4,2,2,12,1077,2,5,1.23297
3,1,1,8,5,1086,1,2,0.806692
4,3,0,4,4,948,3,3,1.077983


In [9]:
mp = pd.read_csv("matches_by_players.csv")
mp
#player_list = {mp.iloc[row]["player_id"]:mp.iloc[row]["player_tag"] for row in range(len(mp))}
player_list = dict(zip(mp.player_id, mp.player_tag))

In [10]:
pd.DataFrame(list(zip(mp.player_id, mp.player_tag, mp.match_id)))

Unnamed: 0,0,1,2
0,5f3d8fdd95f40596eae2412e,Amphis,6159ad3d143c37878b2384a9
1,5f3d8fdd95f40596eae23e01,Torsos,6159ad3d143c37878b2384a9
2,5f3d8fdd95f40596eae23e53,Express,6159ad3d143c37878b2384a9
3,5f7ca648ea8a0f0714fb9a20,Laxin,6159ad3d143c37878b2384a9
4,5f3d8fdd95f40596eae24503,Baked Potato,6159ad3d143c37878b2384a9
...,...,...,...
25618,5f3d8fdd95f40596eae23f8f,Maxeew,62a3988cda9d7ca1c7bb22ba
25619,5f3d8fdd95f40596eae23f3f,Abscrazy,62a3988cda9d7ca1c7bb22ba
25620,5f9c7cde5246bf27936b4572,mikan,62a3988cda9d7ca1c7bb22ba
25621,5f3d8fdd95f40596eae2414a,Burn,62a3988cda9d7ca1c7bb22ba


In [11]:
# Splitting Data
Y = data["advanced_rating"]
X = data[['core_goals','core_assists', 'core_saves', 'core_shots','demo_inflicted', 'demo_taken']]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=404)


In [12]:
model = linear_model.LinearRegression()
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
print(f'model intercept is {model.intercept_} and model coefficients are {model.coef_}')
print(f'model score = {model.score(X_test, Y_test)}')
print(f'mse = {metrics.mean_squared_error(Y_test, y_pred)}')


model intercept is 0.5789492603233665 and model coefficients are [ 0.1347999   0.08098808  0.00997382 -0.00948071 -0.01101295 -0.01653932]
model score = 0.7630103711600398
mse = 0.027826834519614107


In [22]:
vs = cross_val_score(model, X_test,Y_test)
print(f'Cross validation score = {sts.mean(vs)}')

0.7619424236325764

**Feature Engineering**

In [23]:
data.head(
)

Unnamed: 0,core_goals,core_assists,core_saves,core_shots,core_score,demo_inflicted,demo_taken,advanced_rating
0,3,0,4,9,1119,2,2,0.86359
1,2,5,2,13,1209,2,0,1.3505
2,4,2,2,12,1077,2,5,1.23297
3,1,1,8,5,1086,1,2,0.806692
4,3,0,4,4,948,3,3,1.077983


Need to scale core_score and advanced_rating.