In [9]:
import os
import joblib

import numpy as np
import pandas as pd

model = joblib.load('lr_pipeline.pkl')
columns = ['MatchID', 'Blue_Team', 'Purp_Team',
           'Blue_Tower', 'Blue_Inhib', 'Blue_Baron', 'Blue_Dragon', 
           'Purp_Tower', 'Purp_Inhib', 'Purp_Baron', 'Purp_Dragon', 
           'Blue_Kills', 'Blue_Assists', 'Blue_Gold',
           'Purp_Kills', 'Purp_Assists', 'Purp_Gold',
           'Blue_BookOdds', 'Purp_BookOdds']

data = pd.DataFrame(columns=columns)

for file in os.listdir():
    if file[:7]=='Session':
        with open(file, 'r') as infile:
            curr_data = pd.read_csv(file, header=None)
            
            # If scraper got model odds (they are wrong somehow)
            if curr_data.shape[1] == 21:
                curr_data = curr_data.drop([17, 18], axis=1)
                
            curr_data.columns = columns
            data = pd.concat([data, curr_data], axis=0)

In [10]:
# Drop "0s bug", duplicates and instances with no test value
data = data.loc[(data['Blue_Gold']!='0')&(data['Blue_Gold']!=0)]
data = data.drop_duplicates().reset_index(drop=True)
data = data.loc[data['Blue_BookOdds']!='-'].loc[data['Purp_BookOdds']!='-']

data = data.reset_index(drop=True)

# Convert numeric columns from object
data[data.columns[3:]] = data[data.columns[3:]].apply(pd.to_numeric)

# Adding model odds
blue = pd.concat([data.iloc[:, 3:7], data.iloc[:, 11:14]], axis=1)
purp = pd.concat([data.iloc[:, 7:11], data.iloc[:, 14:17]], axis=1)

blue.columns, purp.columns = list(range(7)), list(range(7))

diff = blue - purp

model_odds = (1 / model.predict_proba(diff)) * 0.93

# Rounding odds less than 1 thanks to the adjusting parameter
model_odds = np.where(model_odds<1, 1.001, model_odds)
model_odds = pd.DataFrame(model_odds).round(2)

# Reverse column oreder
model_odds = model_odds[model_odds.columns[::-1]]
model_odds.columns = ['Blue_ModelOdds', 'Purp_ModelOdds']

# Merging
data = pd.concat([data, model_odds], axis=1)

In [14]:
data.to_csv('Session_2.csv', index=False)

########################################################################################################################

In [6]:
import numpy as np
import pandas as pd

import os

s1, s2 = pd.read_csv('Session_1.csv'), pd.read_csv('Session_2.csv')
data = pd.concat([s1, s2], axis=0)

#for i in os.listdir():
#    if i[:7] == 'Session'

In [7]:
data.to_csv('test_set.csv', index=False)

Unnamed: 0,MatchID,Blue_Team,Purp_Team,Blue_Tower,Blue_Inhib,Blue_Baron,Blue_Dragon,Purp_Tower,Purp_Inhib,Purp_Baron,...,Blue_Kills,Blue_Assists,Blue_Gold,Purp_Kills,Purp_Assists,Purp_Gold,Blue_BookOdds,Purp_BookOdds,Blue_ModelOdds,Purp_ModelOdds
0,104169295295198347,MAD Lions,Schalke 04,0,0,0,0,0,0,0,...,2,3,16195,3,6,17262,2.42,1.50,3.13,1.32
1,104169295295198347,MAD Lions,Schalke 04,0,0,0,0,0,0,0,...,2,3,16495,3,6,17396,2.42,1.50,2.93,1.36
2,104169295295198347,MAD Lions,Schalke 04,0,0,0,0,0,0,0,...,2,3,16944,3,6,17592,2.42,1.50,2.67,1.43
3,104169295295198347,MAD Lions,Schalke 04,0,0,0,0,0,0,0,...,2,3,17157,3,6,17845,2.42,1.50,2.93,1.36
4,104169295295198347,MAD Lions,Schalke 04,0,0,0,0,0,0,0,...,2,3,17157,3,6,17845,2.73,1.40,2.93,1.36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,104841804589413345,Legacy Esports,Papara SuperMassive,0,0,0,0,0,0,0,...,5,18,25769,2,2,24548,2.66,1.38,1.14,5.00
666,104841804589413345,Legacy Esports,Papara SuperMassive,0,0,0,0,0,0,0,...,5,18,26018,2,2,24760,2.66,1.38,1.14,5.09
667,104841804589413345,Legacy Esports,Papara SuperMassive,0,0,0,0,0,0,0,...,5,18,26018,2,2,24760,2.31,1.50,1.14,5.09
668,104841804589413345,Legacy Esports,Papara SuperMassive,0,0,0,0,0,0,0,...,5,18,26302,2,2,24911,2.31,1.50,1.12,5.41
