In [59]:
from bs4 import BeautifulSoup
import requests
import datetime
import pandas as pd
import numpy as np
import json
import lightgbm as lgb
import pickle

# query game date

In [60]:
def get_web_page(url):
    resp = requests.get(url=url)
    if resp.status_code != 200:
        print('Invalid url:', resp.url)
        return None
    else:
        resp.encoding='utf-8'
        return resp.text    

In [61]:
doc = get_web_page("https://www.basketball-reference.com/leagues/NBA_2019_games-january.html")

In [62]:
teams = json.loads(open("/Users/chienan/Pycon/github/SportLottery/nbastat/teamId_mpt").read())
teams = dict((v,k) for k,v in teams.items())

In [63]:
soup = BeautifulSoup(doc, "lxml")

In [64]:
data = soup.find('table',attrs={'class':'suppress_glossary sortable stats_table'})

In [65]:
row = data.find('tbody')

In [66]:
data_date = row.find_all('th',attrs={"data-stat": "date_game"})
data_row_home = row.find_all('td',attrs={"data-stat": "home_team_name"})
data_row_visitor = row.find_all('td',attrs={"data-stat": "visitor_team_name"})

In [67]:
data_date = np.array([r.text for r in data_date]).reshape(-1,1)
data_row_home = np.array([r.text for r in data_row_home]).reshape(-1,1)
data_row_visitor = np.array([r.text for r in data_row_visitor]).reshape(-1,1)

In [68]:
df = pd.DataFrame(np.append(np.append(data_date,data_row_home,axis=1),data_row_visitor,axis=1),
                  columns=["GAME_DATE","HOME_TEAM","AWAY_TEAM"])

In [69]:
df["GAME_DATE"] = df.GAME_DATE.apply(lambda dt : datetime.datetime.strptime(dt,"%a, %b %d, %Y").date())

In [70]:
df["TEAM_ID_HOME"] = df.HOME_TEAM.apply(lambda x : teams[x])
df["TEAM_ID_AWAY"] = df.AWAY_TEAM.apply(lambda x : teams[x])

In [71]:
(datetime.datetime.now() + datetime.timedelta(1)).date()

datetime.date(2019, 1, 23)

In [72]:
df = df[df.GAME_DATE==(datetime.datetime.now()).date()].reset_index(drop=True)

In [73]:
df

Unnamed: 0,GAME_DATE,HOME_TEAM,AWAY_TEAM,TEAM_ID_HOME,TEAM_ID_AWAY
0,2019-01-22,Dallas Mavericks,Los Angeles Clippers,1610612742,1610612746
1,2019-01-22,Oklahoma City Thunder,Portland Trail Blazers,1610612760,1610612757
2,2019-01-22,Phoenix Suns,Minnesota Timberwolves,1610612756,1610612750
3,2019-01-22,Toronto Raptors,Sacramento Kings,1610612761,1610612758


# create query data

In [74]:
team = pd.read_pickle("/Users/chienan/Pycon/github/SportLottery/input/FEATURE_AGG_2012TO2019.pkl")
home = team[["GAME_ID","GAME_DATE"]+sorted(list(set([col for col in team.columns if "_H" in col])))[::-1]]
away = team.loc[:,["GAME_ID","GAME_DATE"]+sorted(list(set([col for col in team.columns if ("_A" in col) & ("_H" not in col)])))[::-1]]

In [75]:
home = home.drop("TEAM_ID_HOME",axis=1)
away = away.drop("TEAM_ID_AWAY",axis=1)

In [76]:
home.columns = ["GAME_ID","GAME_DATE"]+[column[:-2] for column in home.loc[:,"TEAM_ID_H":].columns]
away.columns = ["GAME_ID","GAME_DATE"]+[column[:-2] for column in away.loc[:,"TEAM_ID_A":].columns]

In [77]:
team = pd.concat([home,away],axis=0).reset_index(drop=True)

In [78]:
team = team.sort_values("GAME_DATE").reset_index(drop=True)

# query data

In [79]:
df

Unnamed: 0,GAME_DATE,HOME_TEAM,AWAY_TEAM,TEAM_ID_HOME,TEAM_ID_AWAY
0,2019-01-22,Dallas Mavericks,Los Angeles Clippers,1610612742,1610612746
1,2019-01-22,Oklahoma City Thunder,Portland Trail Blazers,1610612760,1610612757
2,2019-01-22,Phoenix Suns,Minnesota Timberwolves,1610612756,1610612750
3,2019-01-22,Toronto Raptors,Sacramento Kings,1610612761,1610612758


In [83]:
team["TEAM_ID"] = team.TEAM_ID.astype(str)

In [84]:
df = df.set_index("TEAM_ID_HOME")
team_h = pd.DataFrame()
for t_id in df.index:
    team_i = team.loc[team[team.TEAM_ID==t_id].index[-1]:team[team.TEAM_ID==t_id].index[-1],:]
    team_i = team_i.drop(["GAME_ID","GAME_DATE"],axis=1)
    team_i.columns=["TEAM_ID_HOME"]+["%s_H"%col for col in team.loc[:,"LAST_9_GAME_WL":].columns]
    team_i = team_i.set_index("TEAM_ID_HOME")
    team_h = pd.concat([team_h,team_i],axis=0)
df = pd.concat([df,team_h],axis=1).reset_index()

df = df.set_index("TEAM_ID_AWAY")
team_v = pd.DataFrame()
for t_id in df.index:
    team_i = team.loc[team[team.TEAM_ID==t_id].index[-1]:team[team.TEAM_ID==t_id].index[-1],:]
    team_i = team_i.drop(["GAME_ID","GAME_DATE"],axis=1)
    team_i.columns=["TEAM_ID_AWAY"]+["%s_A"%col for col in team.loc[:,"LAST_9_GAME_WL":].columns]
    team_i = team_i.set_index("TEAM_ID_AWAY")
    team_v = pd.concat([team_v,team_i],axis=0)
df = pd.concat([df,team_v],axis=1).reset_index()

In [85]:
team = pd.read_pickle("/Users/chienan/Pycon/github/SportLottery/input/FEATURE_AGG_2012TO2019.pkl")

In [86]:
team.drop(list(set(team.columns)-set(df.columns)),axis=1,inplace=True)

In [87]:
column = pickle.load(open("/Users/chienan/Pycon/github/SportLottery/model/columns.pkl","rb"))

In [88]:
x_input = df[column]

In [89]:
model = lgb.Booster(model_file="/Users/chienan/Pycon/github/SportLottery/model/model_agg_20190114.txt")

In [90]:
pred = model.predict(x_input)

In [91]:
pred

array([0.66230116, 0.61956761, 0.26134593, 0.6501226 ])

In [92]:
df

Unnamed: 0,TEAM_ID_AWAY,TEAM_ID_HOME,GAME_DATE,HOME_TEAM,AWAY_TEAM,LAST_9_GAME_WL_H,LAST_9_GAME_TOV_H,LAST_9_GAME_STL_H,LAST_9_GAME_REB_H,LAST_9_GAME_PTS_H,...,LAST_10_GAME_FTA_A,LAST_10_GAME_FG_PCT_A,LAST_10_GAME_FGM_A,LAST_10_GAME_FGA_A,LAST_10_GAME_FG3_PCT_A,LAST_10_GAME_FG3M_A,LAST_10_GAME_FG3A_A,LAST_10_GAME_DREB_A,LAST_10_GAME_BLK_A,LAST_10_GAME_AST_A
0,1610612746,1610612742,2019-01-22,Dallas Mavericks,Los Angeles Clippers,3.0,13.444444,6.888889,47.666667,105.444444,...,27.6,0.4583,40.0,87.3,0.3763,9.3,24.6,34.3,4.8,23.4
1,1610612757,1610612760,2019-01-22,Oklahoma City Thunder,Portland Trail Blazers,4.0,12.0,9.111111,46.888889,119.222222,...,23.0,0.4769,43.4,91.2,0.3597,11.3,31.6,36.3,5.8,26.8
2,1610612750,1610612756,2019-01-22,Phoenix Suns,Minnesota Timberwolves,2.0,14.555556,10.444444,37.333333,109.222222,...,27.4,0.4461,40.6,91.0,0.3319,9.1,27.6,32.8,5.6,24.4
3,1610612758,1610612761,2019-01-22,Toronto Raptors,Sacramento Kings,7.0,13.0,10.222222,44.111111,117.555556,...,17.7,0.4603,42.5,92.9,0.4024,12.3,30.6,34.2,4.3,27.1


In [93]:
#home
home_exp = np.zeros(pred.shape)
home_exp[2] = 2.7


home_exp*pred

array([0.        , 0.        , 0.70563402, 0.        ])

In [58]:
#away
away_exp = np.zeros(pred.shape)
away_exp[7] = 1.1
away_exp[3] = 1.55
away_exp[2] = 2.3
away_exp[1] = 3.5

away_exp*(1-pred)

array([0.        , 1.1075751 , 1.00682761, 0.72221268, 0.        ,
       0.        , 0.        , 0.88953013, 0.        , 0.        ,
       0.        ])