# NBA Data Scraper ja Over/Under Deep Learning -malli

Tämän notebookin koodinpätkillä lisätään viimeisimpien NBA otteluiden tulokset ja tilastot aikaisemmin kerättyjen otteluiden kanssa samaan dataframeen, josta lasketaan Deep Learning -mallissa käytettävien riippumattomien muuttujien arvot. DL-mallissa riippuva muuttuja, jota mallin ennustaa, on tulevien otteluiden pistemäärät koti- ja vierasjoukkueille, josta saadaan arvio ottelun kokonaispistemäärästä. Projekti on muutaman vuoden vanha, minkä takia koodi ei tällä hetkellä toimi eri kirjastojen versiopäivitysten takia.

In [1]:
from selenium import webdriver
import pandas as pd
import numpy as np
import time
import feather
import fastai
from IPython.display import display

Muutama hyödyllinen funktio datafreimien tarkasteluun ja liittämiseen sekä painotettujen liikkuvien keskiarvojen laskuun.

In [2]:
def display_all(df):
    with pd.option_context("display.max_rows", 15000, "display.max_columns", 1000):
        display(df)

In [3]:
def join_df(left, right, left_on, right_on=None, suffix='_y'):
    if right_on is None: right_on = left_on
    return left.merge(right, how='left', left_on=left_on, right_on=right_on, 
                      suffixes=("", suffix))

In [4]:
def weighted_ma(w):
    def g(x):
        return sum(w*x) / sum(w)
    return g

In [5]:
# Alempana lasketaan joidenkin tilastojen liikkuvia keskiarvoja eri vuosilta siten, 
# että uudemmat tilastot saavat enemmän painoarvoa. Painotus tapahtuu tässä lasketuilla painoilla

fast_ma = 3
slow_ma = 6
moving_averages =[fast_ma, slow_ma]

alpha = 0.30
fast_weights = list(reversed([(1-alpha)**n for n in range(fast_ma)]))
slow_weights = list(reversed([(1-alpha)**n for n in range(slow_ma)]))

In [6]:
PATH = "C:/Users/akibr/fastai/courses/dl1/data/NBA/"

In [7]:
bet_date = '2020-03-11'

In [8]:
# Kaikki käytettävät tilastot ja niiden perusteella malliin riippumattomiksi muuttujiksi laskettavat
# jatkuvat muuttujat sekä kategoriamuuttujat

orig_stats1 = ['PTS','FGM','FGA','FG%','3PM','3PA','3P%','FTM','FTA','FT%','OREB','DREB','REB','AST','TOV','STL',
              'BLK','PF','+/-','PPS']

orig_stats2 = ['PTS','FGM','FGA','FG%','3PM','3PA','3P%','FTM','FTA','FT%','OREB','DREB','REB','AST','TOV','STL',
              'BLK','PF','+/-','PPS','POS','OR','DR','SOS','PIE','TIE']

home_stats1 = ['homePTS','homeFGM','homeFGA','homeFG%','home3PM','home3PA','home3P%','homeFTM','homeFTA','homeFT%',
              'homeOREB','homeDREB','homeREB','homeAST','homeTOV','homeSTL','homeBLK','homePF','home+/-','homePPS']

home_stats2 = ['homePTS','homeFGM','homeFGA','homeFG%','home3PM','home3PA','home3P%','homeFTM','homeFTA','homeFT%',
              'homeOREB','homeDREB','homeREB','homeAST','homeTOV','homeSTL','homeBLK','homePF','home+/-','homePPS',
             'homePOS','homeOR','homeDR','homeSOS','homePIE','homeTIE']

away_stats1 = ['awayPTS','awayFGM','awayFGA','awayFG%','away3PM','away3PA','away3P%','awayFTM','awayFTA','awayFT%',
              'awayOREB','awayDREB','awayREB','awayAST','awayTOV','awaySTL','awayBLK','awayPF','away+/-','awayPPS']

away_stats2 = ['awayPTS','awayFGM','awayFGA','awayFG%','away3PM','away3PA','away3P%','awayFTM','awayFTA','awayFT%',
              'awayOREB','awayDREB','awayREB','awayAST','awayTOV','awaySTL','awayBLK','awayPF','away+/-','awayPPS',
             'awayPOS','awayOR','awayDR','awaySOS','awayPIE','awayTIE']

total_stats = ['totalPTS','totalFGM','totalFGA','totalFG%','total3PM','total3PA','total3P%','totalFTM','totalFTA',
               'totalFT%','totalOREB','totalDREB','totalREB','totalAST','totalTOV','totalSTL','totalBLK','totalPF',
               'total+/-','totalPPS','totalPOS','totalOR','totalDR','totalSOS','totalPIE','totalTIE']


team_map = {'Atlanta': 'ATL', 'Brooklyn': 'BKN','Boston': 'BOS','Charlotte': 'CHA','Chicago': 'CHI','Cleveland': 'CLE',
            'Dallas': 'DAL','Denver': 'DEN','Detroit': 'DET','Golden State': 'GSW','Houston': 'HOU','Indiana': 'IND',
            'LA': 'LAC','Los Angeles': 'LAL','Memphis': 'MEM','Miami': 'MIA','Milwaukee': 'MIL','Minnesota': 'MIN',
            'New Orleans': 'NOP','New York': 'NYK','Oklahoma City': 'OKC','Orlando': 'ORL','Philadelphia': 'PHI',
            'Phoenix':'PHX','Portland': 'POR','Sacramento': 'SAC','San Antonio': 'SAS','Toronto': 'TOR',
            'Utah': 'UTA','Washington': 'WAS'}


cat_vars = ['Season','homeTeam','awayTeam']

contin_vars = ['fasthomePTS','fasthome3P%','fasthome3PA','fasthome3PM','fasthomeAST','fasthomeBLK','fasthomeDREB',
               'fasthomeFG%','fasthomeFGA','fasthomeFGM','fasthomeFT%','fasthomeFTA','fasthomeFTM','fasthomeOREB', 
               'fasthomePF','fasthome+/-','fasthomePPS','fasthomeSTL','fasthomeTOV','fasthomeREB','fasthomePOS',
               'fasthomeOR','fasthomeDR','fasthomeSOS','fasthomePIE','fasthomeTIE','slowhomePTS',
               'slowhome3P%','slowhome3PA','slowhome3PM','slowhomeAST','slowhomeBLK','slowhomeDREB','slowhomeFG%',
               'slowhomeFGA','slowhomeFGM','slowhomeFT%','slowhomeFTA','slowhomeFTM','slowhomeOREB','slowhomePF',
               'slowhome+/-','slowhomePPS','slowhomeSTL','slowhomeTOV','slowhomeREB','slowhomePOS','slowhomeOR',
               'slowhomeDR','slowhomeSOS','slowhomePIE','slowhomeTIE','fastawayPTS','fastaway3P%',
               'fastaway3PA','fastaway3PM','fastawayAST','fastawayBLK','fastawayDREB','fastawayFG%','fastawayFGA',
               'fastawayFGM','fastawayFT%','fastawayFTA','fastawayFTM','fastawayOREB','fastawayPF','fastaway+/-',
               'fastawayPPS','fastawaySTL','fastawayTOV','fastawayREB','fastawayPOS','fastawayOR','fastawayDR',
               'fastawaySOS','fastawayPIE','fastawayTIE','slowawayPTS','slowaway3P%','slowaway3PA',
               'slowaway3PM','slowawayAST','slowawayBLK','slowawayDREB','slowawayFG%','slowawayFGA','slowawayFGM',
               'slowawayFT%','slowawayFTA','slowawayFTM','slowawayOREB','slowawayPF','slowaway+/-','slowawayPPS',
               'slowawaySTL','slowawayTOV','slowawayREB','slowawayPOS','slowawayOR','slowawayDR','slowawaySOS',
               'slowawayPIE','slowawayTIE','fasthometotalPTS','fasthometotal3P%','fasthometotal3PA',
               'fasthometotal3PM','fasthometotalAST','fasthometotalBLK','fasthometotalDREB','fasthometotalFG%',
               'fasthometotalFGA','fasthometotalFGM','fasthometotalFT%','fasthometotalFTA','fasthometotalFTM',
               'fasthometotalOREB','fasthometotalPF','fasthometotal+/-','fasthometotalPPS','fasthometotalSTL',
               'fasthometotalTOV','fasthometotalREB','fasthometotalPOS','fasthometotalOR','fasthometotalDR',
               'fasthometotalSOS','fasthometotalPIE','fasthometotalTIE','slowhometotalPTS','slowhometotal3P%',
               'slowhometotal3PA','slowhometotal3PM','slowhometotalAST','slowhometotalBLK','slowhometotalDREB','slowhometotalFG%',
               'slowhometotalFGA','slowhometotalFGM','slowhometotalFT%','slowhometotalFTA','slowhometotalFTM',
               'slowhometotalOREB','slowhometotalPF','slowhometotal+/-','slowhometotalPPS','slowhometotalSTL',
               'slowhometotalTOV','slowhometotalREB','slowhometotalPOS','slowhometotalOR','slowhometotalDR',
               'slowhometotalSOS','slowhometotalPIE','slowhometotalTIE','fastawaytotalPTS','fastawaytotal3P%','fastawaytotal3PA',
               'fastawaytotal3PM','fastawaytotalAST','fastawaytotalBLK','fastawaytotalDREB','fastawaytotalFG%',
               'fastawaytotalFGA','fastawaytotalFGM','fastawaytotalFT%','fastawaytotalFTA','fastawaytotalFTM',
               'fastawaytotalOREB','fastawaytotalPF','fastawaytotal+/-','fastawaytotalPPS','fastawaytotalSTL',
               'fastawaytotalTOV','fastawaytotalREB','fastawaytotalPOS','fastawaytotalOR','fastawaytotalDR',
               'fastawaytotalSOS','fastawaytotalPIE','fastawaytotalTIE','slowawaytotalPTS','slowawaytotal3P%','slowawaytotal3PA',
               'slowawaytotal3PM','slowawaytotalAST','slowawaytotalBLK','slowawaytotalDREB','slowawaytotalFG%',
               'slowawaytotalFGA','slowawaytotalFGM','slowawaytotalFT%','slowawaytotalFTA','slowawaytotalFTM',
               'slowawaytotalOREB','slowawaytotalPF','slowawaytotal+/-','slowawaytotalPPS','slowawaytotalSTL',
               'slowawaytotalTOV','slowawaytotalREB','slowawaytotalPOS','slowawaytotalOR','slowawaytotalDR',
               'slowawaytotalSOS','slowawaytotalPIE','slowawaytotalTIE','HomeMatchScore','AwayMatchScore', 'MatchScore']

## Viimeisimpien ottelutilastojen lisäys aikaisemmat ottelut sisältävään dataframeen

In [9]:
# Tilastojen haku NBA:n tilastosivulta ja niiden säilöminen omaan dataframeen

path_to_gecko = 'C:/Users/akibr/geckodriver.exe'
browser = webdriver.Firefox(executable_path=path_to_gecko)

url = 'https://stats.nba.com/teams/boxscores/'
browser.get(url)

seasonxpath = '/html/body/main/div[2]/div/div[2]/div/div/div[1]/div[1]/div/div/label/select/option[2]'

browser.find_element_by_xpath(seasonxpath).click()
time.sleep(10)

#pagexpath = '/html/body/main/div[2]/div/div[2]/div/div/nba-stat-table/div[1]/div/div/select/option[1]'
#browser.find_element_by_xpath(pagexpath).click()
#time.sleep(15)

table = browser.find_element_by_class_name('nba-stat-table__overflow')

texttable = table.text

games = []

for line_id, lines in enumerate(texttable.split('\n')):
    if line_id == 0:
        column_names = lines.split(' ')
    else:
        games.append(lines)
        
mat = [n.split(' ') for n in games]

column_names[1] = 'TEAM1'
column_names[2] = 'ATVS'
column_names[3] = 'TEAM2'

df_daily = pd.DataFrame(data = mat, columns = column_names)

In [10]:
# Muokataan tilastoista ottelun ajankohta datetime-muotoon ja muut tilastot numeerisiksi

df_daily.drop(labels=['W/L'], axis=1, inplace=True)

for i in range(len(df_daily.columns[4:])):
        
    if i == 0:
        df_daily[df_daily.columns[i+4]] = pd.to_datetime(df_daily[df_daily.columns[i+4]])
        
    else:
        df_daily[df_daily.columns[i+4]] = pd.to_numeric(df_daily[df_daily.columns[i+4]], errors='coerce')
        
df_daily['PPS'] = df_daily.PTS/df_daily.FGA

In [11]:
# Lisätään dataframeen koti ja vierasjoukkueille omat tilastosarakkeet

df_daily['homeTeam'] = ""
df_daily['awayTeam'] = ""

for i, row in df_daily.iterrows():
    if df_daily.at[i,'ATVS'] == '@':
        df_daily.at[i,'homeTeam'] = df_daily.at[i,'TEAM2']
        df_daily.at[i,'awayTeam'] = df_daily.at[i,'TEAM1']
        
    else:
        df_daily.at[i,'homeTeam'] = df_daily.at[i,'TEAM1']
        df_daily.at[i,'awayTeam'] = df_daily.at[i,'TEAM2']


for i in range(len(home_stats1)):
        df_daily.loc[:,home_stats1[i]] = np.nan
        
for i in range(len(away_stats1)):
        df_daily.loc[:,away_stats1[i]] = np.nan
        
for i, row in df_daily.iterrows():
    
    if df_daily.at[i,'homeTeam'] == df_daily.at[i,'TEAM']:
        
        for stat in orig_stats1:
            
            df_daily.at[i, 'home{}'.format(stat)] = df_daily.at[i,'{}'.format(stat)]
            df_daily.at[i, 'home{}'.format(stat)] = df_daily.at[i,'{}'.format(stat)]
            
    elif df_daily.at[i,'awayTeam'] == df_daily.at[i,'TEAM']:
        
        for stat in orig_stats1:
        
            df_daily.at[i, 'away{}'.format(stat)] = df_daily.at[i,'{}'.format(stat)]
            df_daily.at[i, 'away{}'.format(stat)] = df_daily.at[i,'{}'.format(stat)]

In [None]:
# Jaetaan koti ja vierasjoukkueen tilastot omiin taulukoihin, joista poistetaan virheellinen data
# df_daily_core taulukossa on otteluiden perustiedot, kuten kausi ja ottelupäivämäärä sekä koti- ja vierasjoukkueen nimet. Näistä poistetaan tuplakappaleet

df_daily.drop(labels=['TEAM','TEAM1','ATVS','TEAM2'], axis=1, inplace=True)

df_daily_home = df_daily.iloc[:,[0,22,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43]]
df_daily_away = df_daily.iloc[:,[0,23,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63]]
df_daily_core = df_daily.iloc[:,[0,1,22,23]]
df_daily_home.dropna(inplace=True)
df_daily_away.dropna(inplace=True)
df_daily_core.drop_duplicates(inplace=True)

In [13]:
# Lisätään koti ja vierastilastot muokkauksen jälkeen jälleen yhteen ja otetaan kokonaistaulukosta nykyistä vuotta vastaavat tilastot

df_daily_complete = join_df(df_daily_core, df_daily_home, ['DATE','homeTeam'])
df_daily_complete = join_df(df_daily_complete, df_daily_away, ['DATE', 'awayTeam'])

df_daily_complete['HomeMatchScore'] = df_daily_complete.homePTS
df_daily_complete['AwayMatchScore'] = df_daily_complete.awayPTS
df_daily_complete['MatchScore'] = df_daily_complete.awayPTS+df_daily_complete.homePTS

df_daily_complete = df_daily_complete.iloc[::-1]
df_daily_complete.reset_index(drop=True, inplace=True)

df_daily_complete['Season'] = '2019-2020'

In [14]:
# Lasketaan vielä joitain ekstratilastoja, joita voidaan hyödyntää mallinnuksessa, kuten pallon hallintaa ja reboundien määrää

df_daily_complete['homePOS'] = 0.96*(df_daily_complete.homeFGA+df_daily_complete.homeTOV+(0.475*df_daily_complete.homeFTA)-df_daily_complete.homeOREB)
df_daily_complete['awayPOS'] = 0.96*(df_daily_complete.awayFGA+df_daily_complete.awayTOV+(0.475*df_daily_complete.awayFTA)-df_daily_complete.awayOREB)

df_daily_complete['homeOR'] = 100*(df_daily_complete.homePTS/df_daily_complete.homePOS)
df_daily_complete['homeDR'] = 100*(df_daily_complete.awayPTS/df_daily_complete.homePOS)
df_daily_complete['homeSOS'] = 100*(df_daily_complete.homeOR-df_daily_complete.homeDR)
df_daily_complete['homePIE'] = df_daily_complete.homePTS+df_daily_complete.homeFGM+df_daily_complete.homeFTM-df_daily_complete.homeFGA-df_daily_complete.homeFTA+df_daily_complete.homeDREB+(0.5*df_daily_complete.homeOREB)

df_daily_complete['awayOR'] = 100*(df_daily_complete.awayPTS/df_daily_complete.awayPOS)
df_daily_complete['awayDR'] = 100*(df_daily_complete.homePTS/df_daily_complete.awayPOS)
df_daily_complete['awaySOS'] = 100*(df_daily_complete.awayOR-df_daily_complete.awayDR)
df_daily_complete['awayPIE'] = df_daily_complete.awayPTS+df_daily_complete.awayFGM+df_daily_complete.awayFTM-df_daily_complete.awayFGA-df_daily_complete.awayFTA+df_daily_complete.awayDREB+(0.5*df_daily_complete.awayOREB)

df_daily_complete['homeTIE'] = df_daily_complete.homePIE/(df_daily_complete.homePIE+df_daily_complete.awayPIE)
df_daily_complete['awayTIE'] = df_daily_complete.awayPIE/(df_daily_complete.homePIE+df_daily_complete.awayPIE)

In [15]:
# Muutetaan kaikki pelitilastot per minuutti muotoon

for i, row in df_daily_complete.iterrows():
    for stat in home_stats2[:-1]:
        
        df_daily_complete.at[i,stat] = df_daily_complete.at[i,stat]/(df_daily_complete.at[i,'MIN']/5)
        
    for stat in away_stats2[:-1]:
        
        df_daily_complete.at[i,stat] = df_daily_complete.at[i,stat]/(df_daily_complete.at[i,'MIN']/5)
        
df_daily_complete.drop(labels=['MIN'], axis=1, inplace=True)

In [None]:
# Lisätään viimeisimmät tilastot kaikkien aikaisemmin kerättyjen tilastojen kanssa samaan taulukkoon, poistetaan tuplat ja tallennetaan taulukko

df_complete = feather.read_dataframe('{}/NBA_complete'.format(PATH))

df_complete = df_complete.append(df_daily_complete, ignore_index=True)

df_complete.drop_duplicates(inplace=True)

df_complete.reset_index(drop=True, inplace=True)

df_complete.to_feather('{}/NBA_complete'.format(PATH))

In [17]:
display_all(df_complete.tail(10))

Unnamed: 0,AwayMatchScore,DATE,HomeMatchScore,MatchScore,Season,away+/-,away3P%,away3PA,away3PM,awayAST,awayBLK,awayDR,awayDREB,awayFG%,awayFGA,awayFGM,awayFT%,awayFTA,awayFTM,awayOR,awayOREB,awayPF,awayPIE,awayPOS,awayPPS,awayPTS,awayREB,awaySOS,awaySTL,awayTIE,awayTOV,awayTeam,home+/-,home3P%,home3PA,home3PM,homeAST,homeBLK,homeDR,homeDREB,homeFG%,homeFGA,homeFGM,homeFT%,homeFTA,homeFTM,homeOR,homeOREB,homePF,homePIE,homePOS,homePPS,homePTS,homeREB,homeSOS,homeSTL,homeTIE,homeTOV,homeTeam
5898,101.0,2020-03-09,92.0,193.0,2019-2020,0.1875,0.752083,0.75,0.270833,0.458333,0.041667,2.046671,0.8125,0.904167,1.729167,0.75,1.852083,0.375,0.333333,2.246889,0.291667,0.291667,2.041667,1.951,0.025351,2.104167,1.104167,20.021784,0.1875,0.586826,0.416667,TOR,-0.1875,0.775,0.895833,0.333333,0.5,0.083333,2.315732,0.583333,0.829167,1.729167,0.6875,1.4875,0.291667,0.208333,2.10938,0.125,0.416667,1.4375,1.893,0.023092,1.916667,0.708333,-20.635235,0.25,0.413174,0.229167,UTA
5899,114.0,2020-03-10,111.0,225.0,2019-2020,0.0625,0.73125,0.770833,0.270833,0.395833,0.104167,2.351249,0.729167,0.91875,1.9375,0.854167,1.8,0.458333,0.395833,2.414796,0.229167,0.3125,2.072917,2.049,0.025538,2.375,0.958333,6.354726,0.1875,0.496259,0.208333,BOS,-0.0625,0.78125,0.5,0.1875,0.479167,0.166667,2.468404,0.6875,1.03125,1.9375,0.958333,1.89375,0.229167,0.208333,2.403446,0.208333,0.416667,2.104167,2.0045,0.024866,2.3125,0.895833,-6.495801,0.104167,0.503741,0.25,IND
5900,115.0,2020-03-10,122.0,237.0,2019-2020,-0.145833,0.79375,0.4375,0.166667,0.625,0.0625,2.48074,0.583333,1.08125,1.6875,0.875,1.545833,0.645833,0.479167,2.338402,0.166667,0.604167,2.083333,2.1345,0.029578,2.395833,0.75,-14.233752,0.208333,0.479616,0.395833,NYK,0.145833,0.833333,0.729167,0.291667,0.479167,0.083333,2.36836,0.583333,0.985417,1.541667,0.729167,1.758333,0.9375,0.791667,2.512521,0.1875,0.520833,2.260417,2.1075,0.034347,2.541667,0.770833,14.416106,0.166667,0.520384,0.395833,WAS
5901,111.0,2020-03-10,117.0,228.0,2019-2020,-0.125,0.833333,0.729167,0.291667,0.4375,0.0,2.419883,0.6875,1.004167,1.729167,0.833333,1.539583,0.479167,0.354167,2.295787,0.166667,0.416667,2.0625,2.0985,0.027861,2.3125,0.854167,-12.409658,0.1875,0.491315,0.395833,MIN,0.125,0.822917,0.791667,0.3125,0.5,0.104167,2.286525,0.583333,1.06875,1.666667,0.854167,1.602083,0.541667,0.416667,2.410121,0.104167,0.416667,2.135417,2.107,0.030469,2.4375,0.6875,12.359595,0.1875,0.508685,0.375,HOU
5902,103.0,2020-03-10,108.0,211.0,2019-2020,-0.104167,0.520833,0.5,0.125,0.604167,0.104167,2.345509,0.541667,1.041667,1.666667,0.833333,1.539583,0.479167,0.354167,2.236921,0.1875,0.3125,1.822917,1.9985,0.026823,2.145833,0.729167,-10.858839,0.1875,0.461741,0.375,CLE,0.104167,0.735417,0.708333,0.25,0.479167,0.125,2.277955,0.666667,1.041667,1.75,0.875,1.666667,0.3125,0.25,2.388535,0.291667,0.4375,2.125,1.9625,0.026786,2.25,0.958333,11.058033,0.166667,0.538259,0.4375,CHI
5903,120.0,2020-03-10,115.0,235.0,2019-2020,0.104167,0.883333,0.6875,0.291667,0.645833,0.0625,2.344443,0.604167,0.972917,1.916667,0.895833,1.89375,0.458333,0.416667,2.446375,0.25,0.541667,2.166667,2.129,0.027174,2.5,0.854167,10.193231,0.208333,0.503632,0.333333,ORL,-0.104167,0.789583,0.604167,0.229167,0.5,0.104167,2.41015,0.666667,1.041667,1.583333,0.791667,1.535417,0.791667,0.583333,2.309727,0.145833,0.458333,2.135417,2.161,0.031524,2.395833,0.8125,-10.04229,0.145833,0.496368,0.4375,MEM
5904,109.0,2020-03-10,119.0,228.0,2019-2020,-0.208333,0.804167,0.916667,0.354167,0.479167,0.0625,2.477779,0.729167,0.952083,1.916667,0.875,1.514583,0.229167,0.166667,2.269562,0.145833,0.395833,1.96875,2.0845,0.024683,2.270833,0.875,-20.821673,0.166667,0.462103,0.291667,DAL,0.208333,0.977083,0.666667,0.3125,0.666667,0.208333,2.316231,0.708333,1.077083,1.854167,0.958333,1.666667,0.3125,0.25,2.52873,0.125,0.3125,2.291667,2.0425,0.027856,2.479167,0.833333,21.24983,0.229167,0.537897,0.25,SAS
5905,105.0,2020-03-10,121.0,226.0,2019-2020,-0.333333,0.716667,0.666667,0.229167,0.541667,0.104167,2.527911,0.708333,0.8375,1.8125,0.729167,2.0,0.520833,0.5,2.193642,0.1875,0.333333,1.885417,2.0775,0.025144,2.1875,0.895833,-33.426929,0.0625,0.43932,0.291667,PHX,0.333333,1.0125,0.729167,0.354167,0.416667,0.1875,2.29992,0.770833,1.054167,1.854167,0.9375,1.716667,0.354167,0.291667,2.650384,0.1875,0.354167,2.40625,1.9815,0.028324,2.520833,0.958333,35.046401,0.1875,0.56068,0.229167,POR
5906,104.0,2020-03-10,102.0,206.0,2019-2020,0.041667,0.677083,0.833333,0.270833,0.5,0.0625,2.260446,0.6875,0.85625,1.875,0.770833,1.539583,0.479167,0.354167,2.304768,0.291667,0.375,1.770833,1.9585,0.024074,2.166667,0.979167,4.432247,0.145833,0.480226,0.229167,BKN,-0.041667,0.78125,0.833333,0.3125,0.5,0.083333,2.263736,0.791667,0.958333,1.8125,0.833333,1.214583,0.25,0.145833,2.220202,0.166667,0.458333,1.916667,1.994,0.024425,2.125,0.958333,-4.353338,0.145833,0.519774,0.3125,LAL
5907,131.0,2020-03-10,107.0,238.0,2019-2020,0.5,0.947917,0.916667,0.416667,0.583333,0.041667,2.298489,0.75,1.041667,1.916667,0.958333,2.083333,0.395833,0.395833,2.814038,0.229167,0.395833,2.635417,2.0205,0.029665,2.729167,0.979167,51.554896,0.104167,0.57631,0.229167,LAC,-0.5,0.602083,0.791667,0.229167,0.520833,0.0,2.906089,0.645833,0.975,1.645833,0.770833,1.697917,0.5625,0.458333,2.373676,0.083333,0.354167,1.9375,1.9565,0.028217,2.229167,0.729167,-53.241332,0.0625,0.42369,0.208333,GSW


## Liikkuvien keskiarvojen laskeminen tilastoista

Seuraavien funktioiden avulla lasketaan kaikista tilastoista kaksi eri liikkuvaa keskiarvoa ja lopullisessa **df_final** dataframesta luokitellaan kategoria ja jatkuvat muuttujat erikseen. Tätä lopullista taulukkodataa käytetään Deep Learning -mallin opettamiseen.

In [19]:
#df_complete = feather.read_dataframe('{}/NBA_complete'.format(PATH))

In [20]:
teams = df_complete.homeTeam.unique()
teams.sort()

seasons = df_complete.Season.unique()

In [21]:
def calculate_teamavgs(df, moving_averages): 
    
    for season in seasons:
        
        df_season = df[df.loc[:,('Season')] == season]
        
        for team in teams:
            
            df_hometeam = df_season[df_season.loc[:,('homeTeam')] == team]
            df_hometeam = df_hometeam.loc[:,home_stats2]
        
            df_awayteam = df_season[df_season.loc[:,('awayTeam')] == team]
            df_awayteam = df_awayteam.loc[:,away_stats2]
            
        
            for stat in df_hometeam.columns:
            
                df_hometeam.loc[:,'fast{}'.format(stat)] = df_hometeam[stat].rolling(window=moving_averages[0]).apply(weighted_ma(fast_weights)).shift(1)
                df_hometeam.loc[:,'slow{}'.format(stat)] = df_hometeam[stat].rolling(window=moving_averages[1]).apply(weighted_ma(slow_weights)).shift(1)
            
            for stat in df_awayteam.columns:
        
                df_awayteam.loc[:,'fast{}'.format(stat)] = df_awayteam[stat].rolling(window=moving_averages[0]).apply(weighted_ma(fast_weights)).shift(1)
                df_awayteam.loc[:,'slow{}'.format(stat)] = df_awayteam[stat].rolling(window=moving_averages[1]).apply(weighted_ma(slow_weights)).shift(1)
        
            df_hometeam.drop(columns=home_stats2, inplace=True)
            df_awayteam.drop(columns=away_stats2, inplace=True)
            
        
            df = df.combine_first(df_hometeam)
            df = df.combine_first(df_awayteam)
    
    
    return df    

In [22]:
def calculate_totalavgs(df, moving_averages):
    
    for season in seasons:
        
        df_season = df[df.loc[:,('Season')] == season]
        
        for team in teams:
            
            df_hometeam = df_season[df_season.loc[:,('homeTeam')] == team]
            df_awayteam = df_season[df_season.loc[:,('awayTeam')] == team]
            
            df_team = df_hometeam.append(df_awayteam, ignore_index=False)
            df_team = df_team.sort_index()
            
            for i in range(len(total_stats)):
                df_team.loc[:,total_stats[i]] = np.nan
            
            for i, row in df_team.iterrows():
                
                for stat in orig_stats2:
                    
                    if df_team.at[i,'homeTeam'] == team:
                        df_team.at[i, 'total{}'.format(stat)] = df_team.at[i, 'home{}'.format(stat)]
        
                    elif df_team.at[i,'awayTeam'] == team:
                        df_team.at[i, 'total{}'.format(stat)] = df_team.at[i, 'away{}'.format(stat)]
                    
            df_team = df_team.loc[:,total_stats]
        
            for stat in df_team.columns:
                
                df_team.loc[:,'fast{}'.format(stat)] = df_team[stat].rolling(window=moving_averages[0]).apply(weighted_ma(fast_weights)).shift(1)
                df_team.loc[:,'slow{}'.format(stat)] = df_team[stat].rolling(window=moving_averages[1]).apply(weighted_ma(slow_weights)).shift(1)
            
            df_team.drop(columns = total_stats, inplace=True)
            
            for i, row in df_team.iterrows():
                
                for stat in orig_stats2:
                    
                    if df.at[i,'homeTeam'] == team:
        
                        df.at[i, 'fasthometotal{}'.format(stat)] = df_team.at[i,'fasttotal{}'.format(stat)]
                        df.at[i, 'slowhometotal{}'.format(stat)] = df_team.at[i,'slowtotal{}'.format(stat)]
            
                    elif df.at[i,'awayTeam'] == team:
        
                        df.at[i, 'fastawaytotal{}'.format(stat)] = df_team.at[i,'fasttotal{}'.format(stat)]
                        df.at[i, 'slowawaytotal{}'.format(stat)] = df_team.at[i,'slowtotal{}'.format(stat)]
    
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    return df

In [None]:
df_final = calculate_teamavgs(df_complete, moving_averages)
df_final = calculate_totalavgs(df_final, moving_averages)

In [24]:
for v in cat_vars: df_final[v] = df_final[v].astype('category').cat.as_ordered()

for v in contin_vars:
    df_final[v] = df_final[v].fillna(0).astype('float32')

df_final = df_final[cat_vars+contin_vars]

In [25]:
df_final.to_feather('{}/NBA_final'.format(PATH))

In [26]:
display_all(df_final.tail(10))
df_final.shape

Unnamed: 0,Season,homeTeam,awayTeam,fasthomePTS,fasthome3P%,fasthome3PA,fasthome3PM,fasthomeAST,fasthomeBLK,fasthomeDREB,fasthomeFG%,fasthomeFGA,fasthomeFGM,fasthomeFT%,fasthomeFTA,fasthomeFTM,fasthomeOREB,fasthomePF,fasthome+/-,fasthomePPS,fasthomeSTL,fasthomeTOV,fasthomeREB,fasthomePOS,fasthomeOR,fasthomeDR,fasthomeSOS,fasthomePIE,fasthomeTIE,slowhomePTS,slowhome3P%,slowhome3PA,slowhome3PM,slowhomeAST,slowhomeBLK,slowhomeDREB,slowhomeFG%,slowhomeFGA,slowhomeFGM,slowhomeFT%,slowhomeFTA,slowhomeFTM,slowhomeOREB,slowhomePF,slowhome+/-,slowhomePPS,slowhomeSTL,slowhomeTOV,slowhomeREB,slowhomePOS,slowhomeOR,slowhomeDR,slowhomeSOS,slowhomePIE,slowhomeTIE,fastawayPTS,fastaway3P%,fastaway3PA,fastaway3PM,fastawayAST,fastawayBLK,fastawayDREB,fastawayFG%,fastawayFGA,fastawayFGM,fastawayFT%,fastawayFTA,fastawayFTM,fastawayOREB,fastawayPF,fastaway+/-,fastawayPPS,fastawaySTL,fastawayTOV,fastawayREB,fastawayPOS,fastawayOR,fastawayDR,fastawaySOS,fastawayPIE,fastawayTIE,slowawayPTS,slowaway3P%,slowaway3PA,slowaway3PM,slowawayAST,slowawayBLK,slowawayDREB,slowawayFG%,slowawayFGA,slowawayFGM,slowawayFT%,slowawayFTA,slowawayFTM,slowawayOREB,slowawayPF,slowaway+/-,slowawayPPS,slowawaySTL,slowawayTOV,slowawayREB,slowawayPOS,slowawayOR,slowawayDR,slowawaySOS,slowawayPIE,slowawayTIE,fasthometotalPTS,fasthometotal3P%,fasthometotal3PA,fasthometotal3PM,fasthometotalAST,fasthometotalBLK,fasthometotalDREB,fasthometotalFG%,fasthometotalFGA,fasthometotalFGM,fasthometotalFT%,fasthometotalFTA,fasthometotalFTM,fasthometotalOREB,fasthometotalPF,fasthometotal+/-,fasthometotalPPS,fasthometotalSTL,fasthometotalTOV,fasthometotalREB,fasthometotalPOS,fasthometotalOR,fasthometotalDR,fasthometotalSOS,fasthometotalPIE,fasthometotalTIE,slowhometotalPTS,slowhometotal3P%,slowhometotal3PA,slowhometotal3PM,slowhometotalAST,slowhometotalBLK,slowhometotalDREB,slowhometotalFG%,slowhometotalFGA,slowhometotalFGM,slowhometotalFT%,slowhometotalFTA,slowhometotalFTM,slowhometotalOREB,slowhometotalPF,slowhometotal+/-,slowhometotalPPS,slowhometotalSTL,slowhometotalTOV,slowhometotalREB,slowhometotalPOS,slowhometotalOR,slowhometotalDR,slowhometotalSOS,slowhometotalPIE,slowhometotalTIE,fastawaytotalPTS,fastawaytotal3P%,fastawaytotal3PA,fastawaytotal3PM,fastawaytotalAST,fastawaytotalBLK,fastawaytotalDREB,fastawaytotalFG%,fastawaytotalFGA,fastawaytotalFGM,fastawaytotalFT%,fastawaytotalFTA,fastawaytotalFTM,fastawaytotalOREB,fastawaytotalPF,fastawaytotal+/-,fastawaytotalPPS,fastawaytotalSTL,fastawaytotalTOV,fastawaytotalREB,fastawaytotalPOS,fastawaytotalOR,fastawaytotalDR,fastawaytotalSOS,fastawaytotalPIE,fastawaytotalTIE,slowawaytotalPTS,slowawaytotal3P%,slowawaytotal3PA,slowawaytotal3PM,slowawaytotalAST,slowawaytotalBLK,slowawaytotalDREB,slowawaytotalFG%,slowawaytotalFGA,slowawaytotalFGM,slowawaytotalFT%,slowawaytotalFTA,slowawaytotalFTM,slowawaytotalOREB,slowawaytotalPF,slowawaytotal+/-,slowawaytotalPPS,slowawaytotalSTL,slowawaytotalTOV,slowawaytotalREB,slowawaytotalPOS,slowawaytotalOR,slowawaytotalDR,slowawaytotalSOS,slowawaytotalPIE,slowawaytotalTIE,HomeMatchScore,AwayMatchScore,MatchScore
4900,2019-2020,UTA,TOR,2.43046,0.868198,0.739536,0.307268,0.399924,0.076865,0.673992,0.98848,1.756279,0.834094,1.586282,0.585712,0.455004,0.252568,0.42304,-0.071347,0.029104,0.137177,0.302511,0.92656,2.001057,2.52639,2.606246,-7.985608,2.177844,0.494162,2.391952,0.809587,0.724958,0.282875,0.411416,0.075868,0.719959,0.979488,1.767042,0.831446,1.621025,0.566032,0.446184,0.22475,0.398672,-0.07487,0.028422,0.118517,0.303605,0.944709,2.030171,2.455392,2.534904,-7.951146,2.168843,0.497282,2.492295,0.735569,0.920757,0.327911,0.57382,0.0625,0.730974,0.994463,1.726313,0.82382,1.762338,0.607782,0.516743,0.052987,0.480974,0.138128,0.030077,0.208333,0.226123,0.783961,2.100619,2.472864,2.336633,13.623025,2.256231,0.519324,2.463625,0.728487,0.884022,0.310661,0.534388,0.07025,0.727087,0.976859,1.752868,0.820808,1.757756,0.602388,0.511349,0.082069,0.483501,0.087301,0.029329,0.199441,0.2407,0.809156,2.109728,2.433031,2.347531,8.549919,2.208647,0.510609,2.237253,0.66954,0.781393,0.254947,0.436073,0.107972,0.652492,1.029243,1.642789,0.811358,1.641077,0.462804,0.359589,0.166857,0.400495,0.127664,0.028399,0.171709,0.280251,0.819349,1.896975,2.461423,2.321553,13.986992,2.038527,0.543961,2.314003,0.740236,0.791986,0.285025,0.455753,0.109478,0.680263,1.041359,1.682197,0.840781,1.64209,0.447498,0.347417,0.184666,0.395976,0.130555,0.028677,0.153001,0.282535,0.864928,1.912923,2.521216,2.380517,14.069835,2.145101,0.545354,2.492295,0.735569,0.920757,0.327911,0.57382,0.0625,0.730974,0.994463,1.726313,0.82382,1.762338,0.607782,0.516743,0.052987,0.480974,0.138128,0.030077,0.208333,0.226123,0.783961,2.100619,2.472864,2.336633,13.623025,2.256231,0.519324,2.469578,0.720272,0.884022,0.307089,0.532007,0.06906,0.721134,0.973168,1.758821,0.819617,1.748351,0.621436,0.523254,0.091593,0.489453,0.095634,0.029328,0.205394,0.231176,0.812728,2.105842,2.442972,2.349182,9.37899,2.199123,0.507889,92.0,101.0,193.0
4901,2019-2020,IND,BOS,2.355499,0.796851,0.498193,0.189307,0.595986,0.167237,0.892314,1.067637,1.829718,0.935217,1.702569,0.365963,0.295757,0.202911,0.306602,0.498573,0.026868,0.162005,0.302987,1.095224,2.019482,2.431355,1.913934,51.742062,2.384561,0.652771,2.358463,0.75506,0.518528,0.185444,0.589183,0.147631,0.851375,1.068293,1.826633,0.934609,1.672016,0.381658,0.303799,0.18311,0.344795,0.378211,0.026959,0.162783,0.291349,1.034485,2.031513,2.419615,2.027407,39.220749,2.331511,0.61779,2.374619,0.85781,0.640316,0.266457,0.380613,0.107211,0.748002,1.022089,1.796994,0.880613,1.743065,0.41914,0.346937,0.22089,0.392028,0.186263,0.027538,0.169711,0.229833,0.968893,1.924827,2.57333,2.37156,20.176952,2.244483,0.546517,2.381252,0.831165,0.646803,0.260092,0.396283,0.102084,0.761098,1.002854,1.80246,0.86691,1.712814,0.477669,0.387341,0.223028,0.437534,0.137744,0.027527,0.164973,0.252574,0.984126,1.976542,2.516189,2.367254,14.893527,2.227986,0.537732,2.250761,0.694169,0.491153,0.162005,0.481164,0.119673,0.778919,1.036977,1.788718,0.886416,1.499924,0.442256,0.315925,0.156298,0.409247,-0.020072,0.026502,0.163432,0.325723,0.935217,2.081486,2.259082,2.272218,-1.313611,2.079195,0.524016,2.25426,0.71974,0.52094,0.181005,0.475333,0.108424,0.756033,1.022611,1.791876,0.874826,1.571458,0.431202,0.323602,0.150734,0.406282,-0.046652,0.026561,0.176868,0.320184,0.906766,2.079501,2.268522,2.306067,-3.754561,2.06101,0.50989,2.137367,0.690097,0.731545,0.241533,0.410388,0.120148,0.713851,0.857354,1.902017,0.781012,1.645282,0.421804,0.333809,0.251807,0.417618,-0.01484,0.023483,0.172184,0.261511,0.965658,2.027594,2.200552,2.213754,-1.32015,1.768122,0.483364,2.160679,0.679285,0.748172,0.245971,0.417271,0.116669,0.698624,0.856224,1.896913,0.792003,1.601535,0.422239,0.330703,0.248379,0.424934,-0.019298,0.023359,0.175489,0.260005,0.947003,2.024738,2.18789,2.202057,-1.416671,1.787046,0.483069,111.0,114.0,225.0
4902,2019-2020,WAS,NYK,2.145167,0.886083,0.835331,0.350552,0.443113,0.054985,0.570776,0.831459,1.695586,0.678177,1.784218,0.50742,0.438261,0.139935,0.495053,-0.046043,0.026535,0.206526,0.304319,0.710712,2.016954,2.211365,2.263275,-5.190945,1.699344,0.441223,2.225826,0.836455,0.800025,0.322429,0.463504,0.053187,0.594703,0.85471,1.764403,0.735263,1.715291,0.518127,0.432871,0.157025,0.491294,-0.035102,0.026185,0.204887,0.30465,0.751728,2.071813,2.208026,2.246379,-3.83532,1.784646,0.445595,2.203006,0.685512,0.453862,0.154014,0.506088,0.105784,0.655632,1.023402,1.779776,0.872336,1.311739,0.481355,0.304319,0.246766,0.362348,-0.176846,0.025799,0.108257,0.255232,0.902397,1.93621,2.373324,2.562854,-18.953022,1.897546,0.477774,2.223241,0.690658,0.471759,0.165206,0.513552,0.102113,0.660003,0.993425,1.800936,0.880183,1.308512,0.460239,0.297669,0.254052,0.383237,-0.13124,0.025047,0.1251,0.264688,0.914055,1.948977,2.315813,2.454537,-13.872361,1.926949,0.484104,2.117199,0.852987,0.816686,0.331906,0.466419,0.064307,0.570776,0.81328,1.728215,0.673516,1.693788,0.530727,0.438261,0.181887,0.509037,-0.162576,0.0259,0.201865,0.299658,0.752664,2.014158,2.186624,2.35406,-16.743608,1.631754,0.414669,2.235156,0.89586,0.7955,0.336693,0.483524,0.073297,0.578114,0.867846,1.737326,0.722715,1.69759,0.548706,0.453033,0.164273,0.490305,-0.126157,0.027129,0.205898,0.292344,0.742386,2.040992,2.275909,2.404544,-12.863511,1.785122,0.430868,2.083904,0.574962,0.43379,0.116343,0.376427,0.105498,0.73906,0.942618,1.782249,0.806602,1.587966,0.468227,0.354357,0.261511,0.369292,-0.076294,0.024378,0.13575,0.254661,1.000571,1.917895,2.26403,2.335475,-7.14446,1.864203,0.528299,2.194159,0.634759,0.457284,0.138914,0.426112,0.105936,0.751475,0.973999,1.806107,0.844722,1.524141,0.506226,0.3658,0.28294,0.366547,-0.045657,0.025304,0.128242,0.27422,1.034415,1.956331,2.333444,2.376403,-4.295933,1.985294,0.531472,122.0,115.0,237.0
4903,2019-2020,HOU,MIN,2.36016,0.592941,0.817161,0.239441,0.428843,0.132515,0.659722,0.922384,1.916571,0.846461,1.645196,0.54747,0.427797,0.24648,0.501807,-0.159627,0.025684,0.172374,0.333048,0.906202,2.17266,2.261618,2.418103,-15.648451,1.953339,0.462664,2.388027,0.614062,0.851613,0.257128,0.420631,0.131835,0.67396,0.936369,1.880079,0.843254,1.624333,0.571061,0.444391,0.212515,0.485432,-0.074616,0.026577,0.182287,0.312907,0.886474,2.161656,2.300791,2.37452,-7.372876,2.004749,0.474464,2.755993,0.821442,0.83086,0.325628,0.605213,0.096271,0.614726,1.092694,1.979547,1.039098,1.588375,0.46661,0.352169,0.15839,0.505613,-0.011701,0.029002,0.245814,0.326674,0.773116,2.274692,2.525754,2.535041,-0.928611,2.395025,0.485831,2.698242,0.811481,0.836351,0.324146,0.562307,0.094424,0.625416,1.050451,1.960232,0.990215,1.585346,0.521032,0.393666,0.162156,0.479762,-0.08109,0.028677,0.2118,0.325289,0.787573,2.276021,2.47103,2.543971,-7.294087,2.307353,0.473439,2.157059,0.566904,0.799848,0.218132,0.434836,0.109874,0.61777,0.89668,1.795377,0.767218,1.551769,0.538813,0.40449,0.26113,0.501807,-0.32011,0.025179,0.183029,0.362348,0.8789,2.066429,2.180337,2.498695,-31.835806,1.742913,0.433623,2.242628,0.605667,0.875865,0.261498,0.436895,0.114616,0.62798,0.899429,1.826884,0.791083,1.603479,0.512382,0.398964,0.243072,0.482806,-0.20834,0.025464,0.187936,0.346224,0.871053,2.08648,2.223118,2.431753,-20.863543,1.842925,0.453829,2.339707,0.616039,0.76075,0.23126,0.515887,0.067161,0.640887,0.91739,1.924372,0.844749,1.597793,0.544616,0.41895,0.19882,0.428558,-0.184266,0.025465,0.205955,0.298896,0.839707,2.191815,2.237755,2.406472,-16.871731,1.874715,0.441965,2.366287,0.673066,0.778753,0.257645,0.52507,0.069129,0.662996,0.942633,1.910086,0.860881,1.537393,0.518133,0.386879,0.206012,0.413203,-0.135504,0.025985,0.194925,0.312599,0.869008,2.172275,2.27998,2.403542,-12.356203,1.95183,0.457933,117.0,111.0,228.0
4904,2019-2020,CHI,CLE,2.26484,0.590972,0.745719,0.208143,0.519216,0.109018,0.587329,0.953425,2.000856,0.915906,1.566524,0.29785,0.224886,0.302226,0.494197,-0.053082,0.023588,0.188737,0.306221,0.889555,2.060477,2.293889,2.347442,-5.355267,1.845367,0.459268,2.280122,0.647608,0.746691,0.23086,0.511393,0.10637,0.579797,0.969414,1.93137,0.89659,1.543649,0.348143,0.256082,0.282328,0.512711,-0.043169,0.024746,0.209971,0.359357,0.862125,2.086816,2.277773,2.323321,-4.554849,1.874243,0.463325,2.215278,0.767818,0.660483,0.242485,0.538527,0.060312,0.593893,0.998754,1.807363,0.862538,1.337043,0.377188,0.247717,0.214992,0.355403,-0.217371,0.025768,0.133847,0.402207,0.808885,2.086792,2.211524,2.429348,-21.782305,1.842371,0.457956,2.233427,0.793013,0.659463,0.250162,0.550967,0.065924,0.622027,1.005953,1.812174,0.871114,1.367455,0.357071,0.241036,0.208991,0.380088,-0.153302,0.025927,0.13064,0.404029,0.831019,2.089747,2.226561,2.380304,-15.374273,1.902855,0.473327,2.200533,0.664697,0.716324,0.225361,0.510464,0.0918,0.578291,0.90098,1.959285,0.847127,1.520681,0.388984,0.280917,0.202721,0.549943,-0.101123,0.023422,0.259703,0.226408,0.781012,2.08103,2.204179,2.306141,-10.196129,1.65996,0.419235,2.223201,0.689239,0.709157,0.231776,0.510173,0.090148,0.578316,0.921724,1.953089,0.863745,1.479718,0.372657,0.263934,0.208697,0.517556,-0.100807,0.023742,0.246292,0.226214,0.787013,2.061713,2.249728,2.352984,-10.325612,1.707799,0.429173,2.323889,0.774203,0.656511,0.252507,0.548965,0.057103,0.756568,0.96614,1.730804,0.831939,1.425275,0.560833,0.407504,0.253132,0.408541,0.011197,0.026762,0.115702,0.362175,1.0097,2.021993,2.290817,2.284596,0.622105,2.154829,0.518637,2.305601,0.75817,0.662774,0.25092,0.559953,0.052432,0.703674,0.967541,1.76751,0.842426,1.437282,0.50963,0.369829,0.237317,0.385519,-0.05284,0.026328,0.134088,0.330562,0.940992,2.018715,2.304539,2.36381,-5.927153,2.063049,0.493807,108.0,103.0,211.0
4905,2019-2020,MEM,ORL,2.292523,0.685816,0.660198,0.218037,0.5625,0.107021,0.844559,0.971566,1.884418,0.877188,1.470795,0.480403,0.32011,0.289193,0.339041,0.26094,0.025436,0.184836,0.293569,1.133752,2.032306,2.351225,2.080396,27.082903,2.114155,0.572623,2.263781,0.633784,0.597806,0.187472,0.566023,0.135058,0.840598,0.984802,1.884637,0.88868,1.453596,0.448236,0.298949,0.270826,0.348225,0.244637,0.025103,0.181556,0.285503,1.111424,2.027737,2.326248,2.072363,25.38851,2.094548,0.573423,2.604357,0.849762,0.555556,0.230879,0.53672,0.094654,0.742009,1.108885,1.816591,0.96328,1.660322,0.566591,0.446918,0.186929,0.360826,0.269502,0.029907,0.165906,0.27188,0.928938,2.083845,2.603658,2.340978,26.26807,2.466847,0.562619,2.571752,0.851101,0.598462,0.247643,0.550836,0.108648,0.734623,1.086387,1.832747,0.95259,1.625926,0.540521,0.418929,0.193926,0.356355,0.21763,0.029286,0.164448,0.275816,0.92855,2.084529,2.569867,2.357809,21.205807,2.401589,0.549488,2.311834,0.758134,0.718798,0.267314,0.54785,0.107021,0.853215,0.979357,1.85312,0.869197,1.436767,0.465753,0.306126,0.28653,0.341039,0.177036,0.026013,0.169521,0.344178,1.139745,2.046721,2.351074,2.175105,17.596857,2.164764,0.574302,2.328768,0.757643,0.701402,0.259587,0.55888,0.11188,0.858293,0.978825,1.873262,0.878527,1.462708,0.464636,0.312127,0.28478,0.335669,0.251916,0.025925,0.180721,0.326393,1.143074,2.050153,2.364179,2.113461,25.071743,2.182209,0.584183,2.604357,0.849762,0.555556,0.230879,0.53672,0.094654,0.742009,1.108885,1.816591,0.96328,1.660322,0.566591,0.446918,0.186929,0.360826,0.269502,0.029907,0.165906,0.27188,0.928938,2.083845,2.603658,2.340978,26.26807,2.466847,0.562619,2.553263,0.816533,0.628686,0.245724,0.560579,0.092176,0.725634,1.066617,1.845405,0.93998,1.661947,0.540643,0.427578,0.194825,0.346418,0.156186,0.028916,0.153272,0.268381,0.920459,2.088736,2.545604,2.397174,14.842974,2.35782,0.535437,115.0,120.0,235.0
4906,2019-2020,SAS,DAL,2.326484,1.019292,0.662005,0.324011,0.470129,0.097508,0.604357,1.083581,1.636796,0.851979,1.625571,0.381849,0.298516,0.067161,0.400495,-0.076199,0.029639,0.154966,0.321823,0.671518,1.989922,2.436752,2.518037,-8.128415,2.096271,0.481144,2.327626,0.931302,0.657632,0.294515,0.476811,0.095394,0.65188,1.064564,1.687866,0.860434,1.610046,0.401918,0.312243,0.09826,0.405184,-0.022165,0.028862,0.162886,0.309626,0.75014,2.006537,2.418595,2.443449,-2.485381,2.111529,0.497689,2.307078,0.65097,0.898973,0.285293,0.550323,0.108162,0.810407,0.986339,1.816591,0.85997,1.551513,0.405346,0.301846,0.144216,0.413337,0.076865,0.026529,0.152207,0.315734,0.954623,2.093422,2.300744,2.23396,6.678412,2.129471,0.539343,2.309896,0.677659,0.898778,0.2954,0.540581,0.128935,0.810383,0.973545,1.812946,0.84702,1.529833,0.437976,0.320456,0.146063,0.410446,0.084056,0.026607,0.155706,0.320229,0.956445,2.107345,2.288311,2.212732,7.557934,2.109864,0.534458,2.395264,0.687823,0.599574,0.20402,0.45532,0.108295,0.692971,0.910612,1.829858,0.833936,1.778244,0.583179,0.523371,0.092979,0.500689,-0.147707,0.026086,0.205146,0.164107,0.78595,2.090877,2.285242,2.423952,-13.871021,2.078995,0.468681,2.357119,0.740292,0.616709,0.224899,0.460487,0.10605,0.667132,0.943502,1.779506,0.828923,1.758715,0.537761,0.474374,0.092484,0.46662,-0.122009,0.02675,0.200445,0.189858,0.759615,2.047024,2.324129,2.440068,-11.593887,2.056524,0.471871,2.378798,0.750914,0.933111,0.345919,0.507165,0.162377,0.728522,0.919204,1.899339,0.856412,1.455186,0.452346,0.320055,0.197514,0.306737,0.154824,0.02552,0.16517,0.249659,0.926036,2.079694,2.336007,2.177801,15.820543,2.03086,0.498744,2.360481,0.725388,0.924392,0.330435,0.518188,0.148531,0.749436,0.93635,1.878205,0.857321,1.479788,0.440342,0.315404,0.183902,0.333962,0.134913,0.025778,0.161859,0.266535,0.933337,2.0832,2.327001,2.192144,13.485658,2.056045,0.509113,119.0,109.0,228.0
4907,2019-2020,POR,PHX,2.38242,0.717856,0.721937,0.242295,0.480974,0.120434,0.715088,1.002977,1.782249,0.856355,1.549286,0.567637,0.427416,0.179033,0.381944,-0.030251,0.027954,0.169521,0.29043,0.894121,2.076742,2.389415,2.422662,-3.324666,2.120909,0.502586,2.366409,0.756455,0.706363,0.251624,0.46455,0.124273,0.711565,0.999178,1.79221,0.858007,1.522955,0.537534,0.398771,0.183237,0.391736,-0.030203,0.027601,0.163422,0.281027,0.894801,2.059517,2.394331,2.425436,-3.110595,2.096626,0.500651,2.462804,0.874562,0.548516,0.208714,0.622241,0.050133,0.568493,1.022422,1.829148,0.896309,1.776103,0.541952,0.461473,0.26446,0.55527,0.164288,0.028114,0.217656,0.361777,0.832953,2.096537,2.455666,2.284228,17.143795,2.150209,0.507613,2.375922,0.858048,0.5482,0.209686,0.603193,0.057835,0.572016,1.00782,1.784735,0.862999,1.740325,0.525795,0.440238,0.232778,0.520333,0.021793,0.027799,0.19421,0.347224,0.804794,2.062976,2.40484,2.382355,2.248592,2.057034,0.488411,2.417713,0.709199,0.728596,0.242295,0.494292,0.119768,0.681126,1.019892,1.803558,0.881659,1.529443,0.556317,0.4121,0.18769,0.390601,-0.082858,0.028013,0.1582,0.285769,0.868817,2.079251,2.421615,2.506682,-8.506619,2.12657,0.48827,2.434113,0.730261,0.706025,0.243606,0.474127,0.111653,0.702628,1.034659,1.822557,0.904279,1.579764,0.50549,0.381949,0.188346,0.418371,-0.033367,0.027902,0.153633,0.287518,0.890974,2.075363,2.443375,2.476579,-3.320368,2.189095,0.502156,2.708904,0.900923,0.870529,0.377188,0.618341,0.092656,0.719273,1.069644,1.810788,0.928558,1.850742,0.537957,0.4746,0.203957,0.467846,0.110255,0.031213,0.144977,0.313166,0.923231,2.088505,2.706985,2.592796,11.418901,2.58457,0.527352,2.555889,0.86946,0.798249,0.336031,0.605974,0.093215,0.709919,1.044663,1.781098,0.891021,1.797988,0.506785,0.437816,0.196474,0.453147,0.027916,0.029916,0.140069,0.331436,0.906394,2.070512,2.573796,2.543399,3.039682,2.404999,0.515273,121.0,105.0,226.0
4908,2019-2020,LAL,BKN,2.424087,0.57872,0.729262,0.204243,0.473078,0.133942,0.791762,0.959465,1.913813,0.877568,1.663775,0.579243,0.464707,0.226979,0.469939,0.223649,0.026497,0.211187,0.313356,1.018741,2.184317,2.329265,2.112354,21.691175,2.178558,0.534982,2.429748,0.607357,0.718985,0.210778,0.476698,0.138024,0.788701,0.967034,1.888569,0.873875,1.59459,0.618991,0.471218,0.235191,0.470376,0.22156,0.026899,0.203947,0.309469,1.023891,2.166593,2.350255,2.134127,21.612751,2.173577,0.535537,2.41391,0.671051,0.929579,0.311066,0.529153,0.108961,0.737463,0.893135,1.863071,0.832605,1.50466,0.578435,0.437635,0.275443,0.465726,-0.049649,0.025842,0.103592,0.308113,1.012905,2.083678,2.312052,2.367512,-5.546022,2.117827,0.51024,2.362651,0.660473,0.896223,0.29561,0.49839,0.094844,0.761919,0.885819,1.856557,0.817669,1.49713,0.578242,0.431704,0.273018,0.468216,-0.005992,0.02555,0.114384,0.307412,1.034938,2.078992,2.283567,2.289132,-0.556471,2.075653,0.517332,2.377283,0.580917,0.699867,0.195681,0.45234,0.121195,0.77597,0.95391,1.834475,0.83847,1.746927,0.603311,0.504661,0.19882,0.530061,0.212804,0.02702,0.191305,0.300323,0.974791,2.133648,2.332575,2.122881,20.969393,2.15801,0.544782,2.378789,0.615548,0.70854,0.210113,0.462763,0.133416,0.739794,0.971685,1.871356,0.873699,1.741898,0.504815,0.421278,0.1883,0.517549,0.171258,0.026504,0.185571,0.296363,0.928094,2.130438,2.332352,2.166365,16.598707,2.13154,0.533594,2.340278,0.704652,0.72089,0.237158,0.482782,0.079528,0.8496,0.984465,1.667618,0.783866,1.614288,0.675704,0.535388,0.168094,0.454053,-0.026731,0.030084,0.072679,0.394406,1.017694,2.126295,2.280103,2.323465,-4.336174,2.249857,0.529468,2.359083,0.69607,0.774189,0.256034,0.494625,0.087045,0.820961,0.96114,1.717536,0.796314,1.58629,0.650862,0.510422,0.19551,0.457034,-0.032584,0.029,0.080574,0.372367,1.016471,2.115411,2.288263,2.334714,-4.645167,2.216137,0.524557,102.0,104.0,206.0
4909,2019-2020,GSW,LAC,2.401731,0.646984,0.826484,0.244102,0.670282,0.070967,0.696537,0.985084,1.870434,0.871385,1.57382,0.549372,0.414859,0.225076,0.441115,-0.052511,0.027085,0.083333,0.247717,0.921613,2.067865,2.444333,2.486173,-4.18396,2.077245,0.479629,2.304232,0.640978,0.775949,0.229646,0.637895,0.078523,0.693768,0.971048,1.840258,0.849179,1.604093,0.496286,0.376229,0.225781,0.464828,-0.159851,0.026278,0.099296,0.315964,0.919548,2.07953,2.329339,2.479068,-14.972937,1.999753,0.469575,2.342846,0.684475,0.797374,0.262177,0.438356,0.073059,0.852835,0.947489,1.75,0.796518,1.706022,0.61406,0.487633,0.176846,0.486872,0.289193,0.027891,0.145453,0.293379,1.02968,2.071884,2.35438,2.064469,28.991085,2.204195,0.575214,2.32399,0.69086,0.747776,0.251859,0.435694,0.090664,0.822055,0.921847,1.779676,0.80242,1.649636,0.58826,0.467292,0.177857,0.477409,0.1552,0.026737,0.129273,0.283785,0.999911,2.078427,2.283775,2.124576,15.919914,2.136749,0.547215,2.401731,0.646984,0.826484,0.244102,0.670282,0.070967,0.696537,0.985084,1.870434,0.871385,1.57382,0.549372,0.414859,0.225076,0.441115,-0.052511,0.027085,0.083333,0.247717,0.921613,2.067865,2.444333,2.486173,-4.18396,2.077245,0.479629,2.394151,0.678712,0.798349,0.251512,0.652643,0.079179,0.692213,0.994236,1.846891,0.871118,1.536089,0.541427,0.400403,0.225829,0.450153,-0.004989,0.02728,0.104106,0.260497,0.918042,2.053187,2.448674,2.442452,0.622173,2.082481,0.489605,2.287005,0.574049,0.729072,0.204433,0.34922,0.09218,0.806221,0.895434,1.769026,0.760274,1.609913,0.732877,0.562024,0.230594,0.516362,0.084189,0.026955,0.113489,0.305365,1.036815,2.104238,2.26341,2.181113,8.229636,2.028919,0.523565,2.379257,0.681505,0.712745,0.231086,0.406485,0.0941,0.776823,0.954623,1.753234,0.801115,1.658463,0.697186,0.54594,0.218081,0.484292,0.138491,0.028386,0.122382,0.293557,0.994905,2.073478,2.394516,2.25359,14.092593,2.161757,0.535359,107.0,131.0,238.0


(4910, 214)

## Deep Learning -mallin ennustukseen käyttämän taulukon kokoaminen

In [27]:
# Kerätään tulevat ottelut omaan taulukkoon

df_matchup = pd.read_excel(f'{PATH}Matchup.xlsx', parse_dates = [1])

df_matchup = df_matchup[df_matchup.loc[:,('GameDate')] == bet_date]
df_matchup['homeTeam'] = df_matchup['homeTeam'].str.strip()
df_matchup['awayTeam'] = df_matchup['awayTeam'].str.strip()
df_matchup['awayTeam'] = df_matchup['awayTeam'].map(team_map)
df_matchup['homeTeam'] = df_matchup['homeTeam'].map(team_map)
df_matchup.reset_index(drop=True, inplace=True)

In [28]:
display(df_matchup)

Unnamed: 0,Season,GameDate,awayTeam,homeTeam
0,2019-2020,2020-03-11,CHA,MIA
1,2019-2020,2020-03-11,DET,PHI
2,2019-2020,2020-03-11,NYK,ATL
3,2019-2020,2020-03-11,UTA,OKC
4,2019-2020,2020-03-11,DEN,DAL
5,2019-2020,2020-03-11,NOP,SAC


In [29]:
def calculate_predteamavgs(df, moving_averages): 
    
    for i, row in df_matchup.iterrows():
        home_team = df_matchup.at[i,'homeTeam']
        away_team = df_matchup.at[i,'awayTeam']
        
            
        df_hometeam = df[df.loc[:,('homeTeam')] == home_team]
        df_hometeam = df_hometeam.loc[:,home_stats2]
        
        df_awayteam = df[df.loc[:,('awayTeam')] == away_team]
        df_awayteam = df_awayteam.loc[:,away_stats2]
            
        
        for stat in df_hometeam.columns:
            
            df_hometeam.loc[:,'fast{}'.format(stat)] = df_hometeam[stat].rolling(window=moving_averages[0]).apply(weighted_ma(fast_weights)).shift(1)
            df_hometeam.loc[:,'slow{}'.format(stat)] = df_hometeam[stat].rolling(window=moving_averages[1]).apply(weighted_ma(slow_weights)).shift(1)
            
        for stat in df_awayteam.columns:
        
            df_awayteam.loc[:,'fast{}'.format(stat)] = df_awayteam[stat].rolling(window=moving_averages[0]).apply(weighted_ma(fast_weights)).shift(1)
            df_awayteam.loc[:,'slow{}'.format(stat)] = df_awayteam[stat].rolling(window=moving_averages[1]).apply(weighted_ma(slow_weights)).shift(1)
        
        df_hometeam.drop(columns=home_stats2, inplace=True)
        df_awayteam.drop(columns=away_stats2, inplace=True)
            
        
        df = df.combine_first(df_hometeam)
        df = df.combine_first(df_awayteam)

    return df    

In [30]:
def calculate_predtotalavgs(df, moving_averages):
    
    for i, row in df_matchup.iterrows():
        
        team1 = df_matchup.at[i,'homeTeam']
        team2 = df_matchup.at[i,'awayTeam']
            
        df_hometeam1 = df[df.loc[:,('homeTeam')] == team1]
        df_awayteam1 = df[df.loc[:,('awayTeam')] == team1]
            
        df_team1 = df_hometeam1.append(df_awayteam1, ignore_index=False)
        df_team1 = df_team1.sort_index()
        
        df_hometeam2 = df[df.loc[:,('homeTeam')] == team2]
        df_awayteam2 = df[df.loc[:,('awayTeam')] == team2]
            
        df_team2 = df_hometeam2.append(df_awayteam2, ignore_index=False)
        df_team2 = df_team2.sort_index()
        
        teams = {team1: df_team1, team2: df_team2}
        
        for team in teams:
            
            df_team = teams[team]
            
            for i in range(len(total_stats)):
                df_team.loc[:,total_stats[i]] = np.nan
            
            for i, row in df_team.iterrows():
                
                for stat in orig_stats2:
                    
                    if df_team.at[i,'homeTeam'] == team:
                        df_team.at[i, 'total{}'.format(stat)] = df_team.at[i, 'home{}'.format(stat)]
        
                    elif df_team.at[i,'awayTeam'] == team:
                        df_team.at[i, 'total{}'.format(stat)] = df_team.at[i, 'away{}'.format(stat)]
                    
            df_team = df_team.loc[:,total_stats]
            
            for stat in df_team.columns:
                
                df_team.loc[:,'fast{}'.format(stat)] = df_team[stat].rolling(window=moving_averages[0]).apply(weighted_ma(fast_weights)).shift(1)
                df_team.loc[:,'slow{}'.format(stat)] = df_team[stat].rolling(window=moving_averages[1]).apply(weighted_ma(slow_weights)).shift(1)
            
            df_team.drop(columns = total_stats, inplace=True)
            
            for i, row in df_team.iterrows():
                
                for stat in orig_stats2:
                    
                    if df.at[i,'homeTeam'] == team:
        
                        df.at[i, 'fasthometotal{}'.format(stat)] = df_team.at[i,'fasttotal{}'.format(stat)]
                        df.at[i, 'slowhometotal{}'.format(stat)] = df_team.at[i,'slowtotal{}'.format(stat)]
            
                    elif df.at[i,'awayTeam'] == team:
        
                        df.at[i, 'fastawaytotal{}'.format(stat)] = df_team.at[i,'fasttotal{}'.format(stat)]
                        df.at[i, 'slowawaytotal{}'.format(stat)] = df_team.at[i,'slowtotal{}'.format(stat)]
    
    
    return df

Samalla tavalla kuin yllä laskettiin liikkuvat keskiarvot kaikille tilastoille, tässä se tehdään vain nykyisen kauden tähän astisille tilastoille, joita käytetään yllä kerättyjen tulevien otteluiden tulosten ennustamisessa. Tulokset asetetaan nollaksi, jotka korvataan myöhemmin DL-mallin ennustuksilla

In [None]:
df_predict = df_complete[df_complete.loc[:,('Season')] == '2019-2020']
df_predict = df_predict.append(df_matchup, ignore_index=True, sort=True)

df_predict = calculate_predteamavgs(df_predict, moving_averages)
df_predict = calculate_predtotalavgs(df_predict, moving_averages)

df_predict = df_predict[df_predict.loc[:,('GameDate')] == bet_date]
df_predict = df_predict.dropna(axis=1)
df_predict.drop(labels='GameDate',axis=1,inplace=True)
df_predict['HomeMatchScore'] = ""
df_predict.loc[:,'HomeMatchScore'] = 0
df_predict['AwayMatchScore'] = ""
df_predict.loc[:,'AwayMatchScore'] = 0
df_predict['MatchScore'] = ""
df_predict.loc[:,'MatchScore'] = 0
df_predict.reset_index(drop=True, inplace=True)

for v in cat_vars: df_predict[v] = df_predict[v].astype('category').cat.as_ordered()
    
for v in contin_vars:
    df_predict[v] = df_predict[v].fillna(0).astype('float32')
    
df_predict = df_predict[cat_vars+contin_vars]
df_predict.reset_index(drop=True, inplace=True)

In [32]:
display_all(df_predict)

Unnamed: 0,Season,homeTeam,awayTeam,fasthomePTS,fasthome3P%,fasthome3PA,fasthome3PM,fasthomeAST,fasthomeBLK,fasthomeDREB,fasthomeFG%,fasthomeFGA,fasthomeFGM,fasthomeFT%,fasthomeFTA,fasthomeFTM,fasthomeOREB,fasthomePF,fasthome+/-,fasthomePPS,fasthomeSTL,fasthomeTOV,fasthomeREB,fasthomePOS,fasthomeOR,fasthomeDR,fasthomeSOS,fasthomePIE,fasthomeTIE,slowhomePTS,slowhome3P%,slowhome3PA,slowhome3PM,slowhomeAST,slowhomeBLK,slowhomeDREB,slowhomeFG%,slowhomeFGA,slowhomeFGM,slowhomeFT%,slowhomeFTA,slowhomeFTM,slowhomeOREB,slowhomePF,slowhome+/-,slowhomePPS,slowhomeSTL,slowhomeTOV,slowhomeREB,slowhomePOS,slowhomeOR,slowhomeDR,slowhomeSOS,slowhomePIE,slowhomeTIE,fastawayPTS,fastaway3P%,fastaway3PA,fastaway3PM,fastawayAST,fastawayBLK,fastawayDREB,fastawayFG%,fastawayFGA,fastawayFGM,fastawayFT%,fastawayFTA,fastawayFTM,fastawayOREB,fastawayPF,fastaway+/-,fastawayPPS,fastawaySTL,fastawayTOV,fastawayREB,fastawayPOS,fastawayOR,fastawayDR,fastawaySOS,fastawayPIE,fastawayTIE,slowawayPTS,slowaway3P%,slowaway3PA,slowaway3PM,slowawayAST,slowawayBLK,slowawayDREB,slowawayFG%,slowawayFGA,slowawayFGM,slowawayFT%,slowawayFTA,slowawayFTM,slowawayOREB,slowawayPF,slowaway+/-,slowawayPPS,slowawaySTL,slowawayTOV,slowawayREB,slowawayPOS,slowawayOR,slowawayDR,slowawaySOS,slowawayPIE,slowawayTIE,fasthometotalPTS,fasthometotal3P%,fasthometotal3PA,fasthometotal3PM,fasthometotalAST,fasthometotalBLK,fasthometotalDREB,fasthometotalFG%,fasthometotalFGA,fasthometotalFGM,fasthometotalFT%,fasthometotalFTA,fasthometotalFTM,fasthometotalOREB,fasthometotalPF,fasthometotal+/-,fasthometotalPPS,fasthometotalSTL,fasthometotalTOV,fasthometotalREB,fasthometotalPOS,fasthometotalOR,fasthometotalDR,fasthometotalSOS,fasthometotalPIE,fasthometotalTIE,slowhometotalPTS,slowhometotal3P%,slowhometotal3PA,slowhometotal3PM,slowhometotalAST,slowhometotalBLK,slowhometotalDREB,slowhometotalFG%,slowhometotalFGA,slowhometotalFGM,slowhometotalFT%,slowhometotalFTA,slowhometotalFTM,slowhometotalOREB,slowhometotalPF,slowhometotal+/-,slowhometotalPPS,slowhometotalSTL,slowhometotalTOV,slowhometotalREB,slowhometotalPOS,slowhometotalOR,slowhometotalDR,slowhometotalSOS,slowhometotalPIE,slowhometotalTIE,fastawaytotalPTS,fastawaytotal3P%,fastawaytotal3PA,fastawaytotal3PM,fastawaytotalAST,fastawaytotalBLK,fastawaytotalDREB,fastawaytotalFG%,fastawaytotalFGA,fastawaytotalFGM,fastawaytotalFT%,fastawaytotalFTA,fastawaytotalFTM,fastawaytotalOREB,fastawaytotalPF,fastawaytotal+/-,fastawaytotalPPS,fastawaytotalSTL,fastawaytotalTOV,fastawaytotalREB,fastawaytotalPOS,fastawaytotalOR,fastawaytotalDR,fastawaytotalSOS,fastawaytotalPIE,fastawaytotalTIE,slowawaytotalPTS,slowawaytotal3P%,slowawaytotal3PA,slowawaytotal3PM,slowawaytotalAST,slowawaytotalBLK,slowawaytotalDREB,slowawaytotalFG%,slowawaytotalFGA,slowawaytotalFGM,slowawaytotalFT%,slowawaytotalFTA,slowawaytotalFTM,slowawaytotalOREB,slowawaytotalPF,slowawaytotal+/-,slowawaytotalPPS,slowawaytotalSTL,slowawaytotalTOV,slowawaytotalREB,slowawaytotalPOS,slowawaytotalOR,slowawaytotalDR,slowawaytotalSOS,slowawaytotalPIE,slowawaytotalTIE,HomeMatchScore,AwayMatchScore,MatchScore
0,2019-2020,MIA,CHA,2.343417,0.970814,0.842085,0.394406,0.607211,0.092846,0.714897,0.990287,1.748383,0.82753,1.630108,0.374429,0.29395,0.143645,0.38613,0.149068,0.028041,0.121385,0.232401,0.858543,1.934393,2.526923,2.368067,15.88553,2.128805,0.523376,2.412952,0.969437,0.803892,0.376306,0.616516,0.09236,0.704207,1.020781,1.720491,0.839095,1.615733,0.463206,0.358455,0.139904,0.398059,0.14795,0.02937,0.139315,0.256235,0.844111,1.974572,2.548521,2.391494,15.702757,2.200964,0.524745,2.118597,0.741338,0.731489,0.282764,0.499764,0.11183,0.646735,0.816365,1.77168,0.756675,1.4052,0.431454,0.322482,0.196829,0.380298,-0.201179,0.022908,0.137446,0.210502,0.843565,1.910681,2.110202,2.315579,-20.537737,1.73977,0.455068,2.083958,0.723307,0.712941,0.265115,0.489183,0.097263,0.651874,0.824083,1.782955,0.749455,1.37841,0.441696,0.319933,0.207736,0.361014,-0.219212,0.022981,0.150278,0.228316,0.859611,1.932807,2.099112,2.324605,-22.549314,1.684437,0.445745,2.184551,0.85626,0.69121,0.286149,0.524068,0.159342,0.762938,0.98964,1.568779,0.741914,1.604737,0.542713,0.414574,0.142123,0.470224,0.078672,0.029311,0.177131,0.390601,0.905061,1.992043,2.292771,2.204264,8.850646,2.063546,0.553931,2.229012,0.876095,0.710865,0.300848,0.539884,0.137548,0.76527,0.979849,1.625874,0.760865,1.601263,0.531513,0.406435,0.144553,0.457008,0.112079,0.028858,0.155556,0.340139,0.909823,1.990971,2.338143,2.215992,12.215067,2.076472,0.548336,2.327691,0.807728,0.744807,0.310066,0.549041,0.063218,0.488916,0.953342,1.643826,0.809282,1.607569,0.478068,0.399061,0.144889,0.380298,0.011245,0.027501,0.150098,0.217827,0.633804,1.866092,2.388392,2.368,2.039259,1.975501,0.481432,2.259816,0.799693,0.716031,0.289963,0.538265,0.064857,0.537885,0.947586,1.678669,0.804151,1.602351,0.44222,0.36155,0.181695,0.370684,-0.005281,0.026693,0.136618,0.244314,0.71958,1.873289,2.358844,2.358305,0.053892,1.933361,0.483809,0.0,0.0,0.0
1,2019-2020,PHI,DET,2.425824,0.803621,0.587548,0.239489,0.564419,0.125215,0.729829,0.997763,1.814731,0.88816,1.716713,0.471196,0.410015,0.211011,0.401094,0.232593,0.027281,0.149712,0.196089,0.94084,1.942681,2.552778,2.310678,24.210005,2.273407,0.564425,2.415881,0.806181,0.57877,0.233135,0.570804,0.133542,0.735126,1.003915,1.832067,0.896801,1.634032,0.474155,0.389143,0.221261,0.385756,0.216387,0.02709,0.148697,0.217656,0.956387,1.971539,2.518913,2.295133,22.377998,2.241359,0.556637,1.991724,0.749848,0.790335,0.285103,0.478786,0.059646,0.616343,0.848278,1.71328,0.700723,1.442542,0.443683,0.305175,0.140791,0.435788,-0.144787,0.024165,0.181317,0.276826,0.757135,1.977662,2.098546,2.250833,-15.228724,1.527397,0.43479,2.018771,0.743791,0.796141,0.286847,0.475617,0.065323,0.607046,0.850545,1.717962,0.70799,1.533078,0.431576,0.315945,0.1624,0.438133,-0.158526,0.024302,0.171741,0.286349,0.769446,1.965033,2.129845,2.298146,-16.830149,1.581414,0.4378,2.41562,0.855498,0.756279,0.311454,0.547565,0.037005,0.631659,0.998887,1.83457,0.880137,1.720224,0.415715,0.343893,0.253805,0.398877,0.014555,0.027449,0.128425,0.26113,0.885464,1.957785,2.570106,2.562411,0.76957,2.147926,0.491243,2.42201,0.878599,0.76556,0.323917,0.55393,0.059892,0.619438,0.998748,1.857554,0.891362,1.646139,0.39691,0.315369,0.241511,0.407041,-0.005101,0.02717,0.136612,0.239653,0.86095,1.962458,2.570181,2.579647,-0.946628,2.114471,0.482966,1.997051,0.670006,0.761701,0.248478,0.485445,0.055651,0.597698,0.871718,1.734589,0.728691,1.412576,0.425704,0.291191,0.156107,0.431792,-0.186739,0.023918,0.14003,0.276161,0.753805,1.974578,2.106645,2.303748,-19.710283,1.532392,0.42629,2.038816,0.715738,0.77244,0.268716,0.491835,0.058639,0.590798,0.881043,1.735439,0.736879,1.437844,0.427502,0.296342,0.164538,0.445519,-0.170461,0.024422,0.152178,0.280558,0.755336,1.972342,2.153756,2.33421,-18.045441,1.582162,0.431427,0.0,0.0,0.0
2,2019-2020,ATL,NYK,2.313117,0.755301,0.767373,0.309164,0.551468,0.115943,0.64708,0.905106,1.806586,0.844769,1.494215,0.421659,0.314416,0.224325,0.428436,-0.164403,0.024805,0.080456,0.327734,0.871405,2.025871,2.184273,2.34562,-16.134665,2.003298,0.471626,2.401153,0.76783,0.771415,0.307999,0.571536,0.109315,0.675455,0.927352,1.857392,0.873117,1.525008,0.460327,0.34692,0.234259,0.417874,-0.07878,0.025521,0.098563,0.295538,0.909714,2.051834,2.291287,2.368754,-7.746605,2.096056,0.484499,2.270643,0.718636,0.42285,0.148021,0.574391,0.076484,0.601313,1.066533,1.719463,0.879661,1.395672,0.533961,0.363299,0.196632,0.460997,-0.15449,0.027549,0.131754,0.303938,0.797945,1.997185,2.370807,2.533284,-16.247692,1.959808,0.47802,2.265946,0.724279,0.459358,0.164764,0.547081,0.091975,0.634675,1.020834,1.757424,0.870609,1.387655,0.525902,0.359965,0.225447,0.460098,-0.145225,0.026561,0.150102,0.310808,0.860122,2.008885,2.318536,2.467547,-14.90119,1.960593,0.481255,2.320442,0.735457,0.813986,0.307832,0.529493,0.071327,0.675714,0.908369,1.789272,0.844769,1.348448,0.454955,0.323072,0.215668,0.481043,-0.101808,0.024877,0.109756,0.303095,0.891382,2.00909,2.210726,2.311361,-10.063432,2.027604,0.485258,2.328861,0.741001,0.80899,0.304311,0.538774,0.085813,0.665507,0.909359,1.831162,0.844521,1.466157,0.450764,0.335509,0.225821,0.457677,-0.12277,0.025026,0.117731,0.311207,0.891328,2.045434,2.228388,2.348604,-12.021593,2.005381,0.474744,2.213375,0.650713,0.438166,0.134703,0.473839,0.099125,0.682553,0.99628,1.740772,0.830384,1.545634,0.563927,0.417903,0.223269,0.475647,-0.093893,0.026578,0.180365,0.316591,0.905822,2.017881,2.282637,2.369185,-8.654802,1.951151,0.513522,2.262161,0.685206,0.449682,0.14724,0.487445,0.092905,0.699366,1.007257,1.768859,0.853806,1.543899,0.553941,0.40731,0.247224,0.452833,-0.074043,0.026705,0.158936,0.319871,0.94659,2.020442,2.329951,2.400053,-7.010168,2.023456,0.516646,0.0,0.0,0.0
3,2019-2020,OKC,UTA,2.250666,0.74786,0.542047,0.192161,0.415906,0.125856,0.736492,0.992532,1.735445,0.829528,1.512129,0.552321,0.399448,0.170662,0.421709,0.01446,0.026937,0.139174,0.276541,0.907154,2.01953,2.318241,2.309282,0.895921,2.013699,0.507684,2.262814,0.74975,0.551644,0.196753,0.404827,0.107974,0.708697,0.982795,1.750533,0.827609,1.570041,0.547098,0.410843,0.18558,0.425985,0.025126,0.026871,0.149962,0.270783,0.894277,2.011783,2.341653,2.318968,2.268466,2.005122,0.501288,2.237253,0.66954,0.781393,0.254947,0.436073,0.107972,0.652492,1.029243,1.642789,0.811358,1.641077,0.462804,0.359589,0.166857,0.400495,0.127664,0.028399,0.171709,0.280251,0.819349,1.896975,2.461423,2.321553,13.986992,2.038527,0.543961,2.316895,0.737089,0.786203,0.281794,0.47327,0.105567,0.685365,1.062329,1.659918,0.846563,1.658927,0.435763,0.341975,0.172591,0.4055,0.134637,0.029094,0.146028,0.299882,0.857955,1.914429,2.523475,2.376893,14.658144,2.181412,0.552739,2.369292,0.832601,0.556126,0.221651,0.441876,0.143836,0.663908,1.033457,1.740392,0.86035,1.561016,0.569159,0.426941,0.113965,0.413052,0.195301,0.028469,0.199486,0.272451,0.777873,2.082459,2.381383,2.180687,20.069525,2.067922,0.53036,2.272157,0.768219,0.572599,0.207827,0.415515,0.133971,0.664248,0.991389,1.747462,0.828158,1.526472,0.555505,0.408014,0.124704,0.410331,0.033806,0.027212,0.186294,0.276216,0.788952,2.076326,2.29,2.249411,4.058982,1.931963,0.49766,2.075818,0.731849,0.838565,0.29804,0.448725,0.097317,0.621956,0.93398,1.675228,0.748097,1.566943,0.375571,0.281583,0.1543,0.405346,-0.022355,0.025912,0.218037,0.255137,0.776256,1.876283,2.308809,2.33208,-2.327116,1.753805,0.488106,2.185636,0.756665,0.82564,0.302851,0.475694,0.102468,0.651184,0.973451,1.691288,0.788963,1.59413,0.399915,0.304858,0.160933,0.405516,0.036805,0.026961,0.187934,0.267358,0.812116,1.908166,2.386029,2.347447,3.858263,1.919904,0.504982,0.0,0.0,0.0
4,2019-2020,DAL,DEN,2.378798,0.750914,0.933111,0.345919,0.507165,0.162377,0.728522,0.919204,1.899339,0.856412,1.455186,0.452346,0.320055,0.197514,0.306737,0.154824,0.02552,0.16517,0.249659,0.926036,2.079694,2.336007,2.177801,15.820543,2.03086,0.498744,2.471732,0.761397,0.920918,0.343215,0.532061,0.143671,0.740665,0.959179,1.903011,0.889658,1.503394,0.477466,0.3492,0.190947,0.348467,0.181707,0.026654,0.151217,0.246223,0.931612,2.09768,2.417894,2.236906,18.098818,2.166251,0.512391,2.20957,0.759132,0.594083,0.218893,0.54804,0.061263,0.515982,1.044511,1.785483,0.89317,1.305651,0.283866,0.204338,0.232782,0.40449,-0.140887,0.025838,0.221937,0.327721,0.748763,1.934648,2.385505,2.530743,-14.523803,1.870101,0.458433,2.20629,0.713765,0.591289,0.204607,0.520732,0.070763,0.56037,1.039526,1.763155,0.877912,1.409559,0.324392,0.24586,0.221848,0.40743,-0.115182,0.026149,0.203691,0.327089,0.782219,1.941583,2.372073,2.49079,-11.871739,1.913809,0.471364,2.326769,0.762814,0.923992,0.338851,0.48516,0.103786,0.715849,0.941362,1.923992,0.869007,1.500999,0.350361,0.249905,0.187119,0.345224,0.001427,0.025184,0.173992,0.257705,0.902968,2.074559,2.338539,2.333359,0.517962,1.980736,0.475764,2.326086,0.746188,0.917075,0.334221,0.504815,0.122721,0.746688,0.939487,1.89641,0.864291,1.476226,0.370323,0.263283,0.174981,0.347523,0.030273,0.025266,0.169968,0.276574,0.921669,2.08695,2.298146,2.267598,3.05486,2.021106,0.49449,2.247527,0.784931,0.724315,0.276065,0.587995,0.09941,0.675894,0.998763,1.847127,0.881469,1.541571,0.260654,0.208524,0.177607,0.414669,0.129186,0.025424,0.237158,0.313642,0.853501,2.022694,2.324748,2.199622,12.512524,1.994435,0.52059,2.265287,0.748799,0.678906,0.254175,0.578519,0.095013,0.674655,1.002478,1.836243,0.879841,1.533797,0.327322,0.25143,0.20579,0.430777,0.048305,0.025796,0.209703,0.31748,0.880445,2.019275,2.342409,2.302006,4.040331,2.010543,0.509948,0.0,0.0,0.0
5,2019-2020,SAC,NOP,2.414098,0.70664,0.623193,0.209951,0.48868,0.102549,0.758752,1.060483,1.797089,0.912291,1.737605,0.450533,0.379566,0.159056,0.460997,-0.128139,0.028013,0.154776,0.294425,0.917808,2.060603,2.437607,2.570692,-13.308466,2.296613,0.504727,2.419711,0.736075,0.657061,0.233129,0.474782,0.099148,0.725296,1.036841,1.80275,0.89419,1.709016,0.483672,0.398201,0.174363,0.446614,-0.050222,0.028021,0.157132,0.276058,0.899659,2.048822,2.457595,2.509296,-5.17005,2.238158,0.5047,2.391432,0.659823,0.736228,0.244457,0.579727,0.081011,0.823503,0.941095,1.944556,0.901904,1.330232,0.501081,0.343168,0.21356,0.469379,0.057593,0.025099,0.161451,0.415482,1.037063,2.289111,2.110037,2.061944,4.809311,2.12115,0.523483,2.42538,0.726696,0.753302,0.271797,0.607641,0.079952,0.812481,0.960967,1.941976,0.912234,1.305805,0.490576,0.329115,0.227386,0.453633,0.107341,0.025685,0.160353,0.391124,1.039866,2.245189,2.205223,2.105874,9.934924,2.160351,0.526697,2.39745,0.780356,0.689783,0.267218,0.535293,0.105213,0.774068,1.013204,1.79043,0.868341,1.784418,0.45586,0.39355,0.187024,0.460331,-0.046899,0.027953,0.140126,0.288432,0.961092,2.024037,2.468832,2.515427,-4.659475,2.280632,0.519343,2.412368,0.768013,0.688836,0.258958,0.513645,0.098677,0.763232,1.009958,1.812491,0.876213,1.778009,0.466963,0.400985,0.197131,0.454136,-0.004138,0.027786,0.162624,0.280633,0.960363,2.033088,2.471075,2.474708,-0.363236,2.271909,0.520259,2.393304,0.699958,0.78019,0.269636,0.57102,0.104405,0.827812,0.928859,1.9434,0.882693,1.349559,0.527792,0.358282,0.258339,0.482371,0.146736,0.025288,0.146977,0.41013,1.086151,2.252056,2.168513,2.036641,13.187156,2.12007,0.536788,2.437221,0.67173,0.768204,0.255024,0.584467,0.091273,0.798682,0.939689,1.964683,0.899242,1.38546,0.554004,0.383714,0.267312,0.43471,0.073982,0.025555,0.161262,0.388086,1.065995,2.254664,2.217461,2.151865,6.559685,2.133829,0.519961,0.0,0.0,0.0


In [33]:
df_predict.to_feather('{}/NBA_predict'.format(PATH))

## Deep Learning malli O/U tuloksien ennustamiseen

Aikaisemmin kootusta df_final taulukosta erotellaan erikeen koti, vieras ja koko ottelun tilastotaulukot. Sama tehdään myös ennustettavat ottelut sisältävälle taulukolle. Koti- ja vierasjoukkueiden pisteille sekä kokonaisottelun tulokselle tehdään kaikille oma DL-malli, joita harjoitetaan df_home, df_away ja df_match taulukoissa olevalla datalla pari epochia. Kyseisillä malleilla ennustetaan predict-taulukoissa olevien otteluiden tulokset niihin lasketuiden tilastojen perusteella.

In [None]:
df_home = df_final.drop(labels=['AwayMatchScore','MatchScore'],axis=1)
df_away = df_final.drop(labels=['HomeMatchScore','MatchScore'],axis=1)
df_match = df_final.drop(labels=['HomeMatchScore','AwayMatchScore'],axis=1)

df_predict_home = df_predict.drop(labels='AwayMatchScore',axis=1)
df_predict_away = df_predict.drop(labels='HomeMatchScore',axis=1)
df_predict_match = df_predict.drop(labels=['HomeMatchScore','AwayMatchScore'],axis=1)

In [None]:
# Eri riippuvien muuttujien määrittäminen sekä DL-mallien asetukset

dep_home = 'HomeMatchScore'
dep_away = 'AwayMatchScore'
dep_match = 'MatchScore'
cat_list = cat_vars
contin_list = contin_vars[:-3]
procs = [FillMissing, Categorify, Normalize]

In [None]:
home_data = (TabularList.from_df(df_home, path=PATH, cat_names=cat_list, cont_names=contin_list, procs=procs)
                           .split_by_idx(list(range((len(df_home)-200),(len(df_home)))))
                           .label_from_df(cols=dep_home)
                           .databunch())

In [None]:
away_data = (TabularList.from_df(df_away, path=PATH, cat_names=cat_list, cont_names=contin_list, procs=procs)
                           .split_by_idx(list(range((len(df_away)-200),(len(df_away)))))
                           .label_from_df(cols=dep_away)
                           .databunch())

In [None]:
match_data = (TabularList.from_df(df_match, path=PATH, cat_names=cat_list, cont_names=contin_list, procs=procs)
                           .split_by_idx(list(range((len(df_match)-200),(len(df_match)))))
                           .label_from_df(cols=dep_match)
                           .databunch())

In [None]:
learn_home = tabular_learner(home_data, layers=[200,100,100], metrics=mean_absolute_error)
learn_away = tabular_learner(away_data, layers=[200,100,100], metrics=mean_absolute_error)
learn_match = tabular_learner(match_data, layers=[200,100,100], metrics=mean_absolute_error)

In [None]:
learn_home.fit(2, 1e-2)

In [None]:
learn_away.fit(2, 1e-2)

In [None]:
learn_match.fit(2, 1e-2)

In [None]:
df_matchup['TeamScore'] = ""
df_matchup['MatchScore'] = ""

for i, row in df_predict_away.iterrows():
    datapred_away = df_predict_away.iloc[i]
    pred_away = learn_away.predict(datapred_away)
    number = float(pred_away[1])
    df_matchup.at[i,'AwayScore'] = number

for i, row in df_predict_home.iterrows():
    datapred_home = df_predict_home.iloc[i]
    pred_home = learn_home.predict(datapred_home)
    number = float(pred_home[1])
    df_matchup.at[i,'HomeScore'] = number
    
for i, row in df_predict_match.iterrows():
    datapred_match = df_predict_match.iloc[i]
    pred_match = learn_match.predict(datapred_match)
    number = float(pred_match[1])
    df_matchup.at[i,'MatchScore'] = number

    
for i, row in df_matchup.iterrows():
    df_matchup.at[i,'TeamScore'] = df_matchup.at[i,'HomeScore'] + df_matchup.at[i,'AwayScore']
    
    
display(df_matchup)

Match-malli laskee ottelun kokonaispistemäärän aikaisempien otteluiden kokonaispistemääristä ja muista tilastoista. Home ja Away -mallit tekevät saman koti ja vierasjoukkueen tilastoista ja näiden kahden mallin ulostulot lasketaan yhteen kokonaispistemäärien saamiseksi. Näiden kahden mallin tulosta voidaan verrata toisiinsa. Tulos näyttää esimerkiksi tältä:

![alt text](https://github.com/SDCH-analytics/NBAOverUnder/blob/main/esimerkki.PNG)