# FanDuel data

The purpose of this file is to take in an upcoming FanDuel competition (csv) and generate the inputs necessary for the ML model

In [1]:
import nbimporter
import pandas as pd
from datetime import datetime
import player_matches_data as pm_data
import games_data
import pickle
import numpy as np

Importing Jupyter notebook from player_matches_data.ipynb
Importing Jupyter notebook from games_data.ipynb


In [31]:
competition_key = "2019-12-01-41164"

In [2]:
competition = pd.read_csv("../data/FanDuel-NBA-" + competition_key + "-players-list.csv")

In [3]:
competition = competition[competition['Injury Indicator'] != "O"].copy()

In [4]:
date = datetime.today()
year = date.year
month = date.month
day = date.day

date = datetime(year, month, day) # strips precision

In [5]:
def add_home_game(row):
    return row['team_key'] == row['Game'].split("@")[1]

In [6]:
competition['date'] = date
competition['year'] = year
competition['month'] = month
competition['day'] = day
competition = competition.rename(
    columns={
        'Nickname': 'name',
        'Team': 'team_key',
        'Opponent': 'opponent_key'
    }
)
competition['home_game'] = competition.apply(add_home_game, axis = 1)

In [10]:
name_id_mapping = pd.read_csv("../data/name_id_mapping.csv")

In [11]:
def add_player_id(row):
    name = row['name']
    ids = name_id_mapping[name_id_mapping.name == name]['player_id']
    
    if len(ids) > 0:
        return ids.iloc[0]
    else:
        print("Please update player_id for name " + name)
        return None

In [12]:
competition['player_id'] = competition.apply(add_player_id, axis = 1)

Please update player_id for name Nigel Williams-Goss
Please update player_id for name Garrison Mathews
Please update player_id for name Justin Robinson
Please update player_id for name Mfiondu Kabengele
Please update player_id for name Alen Smailagic
Please update player_id for name Oshae Brissett
Please update player_id for name Juwan Morgan
Please update player_id for name Shamorie Ponds
Please update player_id for name Amir Coffey
Please update player_id for name BJ Johnson
Please update player_id for name Dewan Hernandez
Please update player_id for name Jarrell Brantley
Please update player_id for name Melvin Frazier
Please update player_id for name Miye Oni
Please update player_id for name Justin Wright-Foreman


In [13]:
competition = competition.dropna(subset=['player_id'])

In [14]:
fd_complete = pm_data.add_game_ids(competition)
fd_complete = pm_data.add_season_start_year(fd_complete)

In [15]:
stats = ['secs', 'fgm', 'fga', '3pm', '3pa', 'ftm', 'fta', 'orb', 'drb', 'ast', 'stl', 'blk', 'tvr', 'pf', 'fp']

In [16]:
for stat in stats:
    fd_complete[stat] = 0

In [17]:
old_data = pd.read_csv("../data/20100101_20191129_player_matches.csv")

ROLLING_GAMES_WINDOW = 5

In [18]:
fd_complete = pm_data.roll_with_old_data(fd_complete, old_data, stats, ROLLING_GAMES_WINDOW)

## Create games dataframe (input to model)

In [19]:
PLAYER_SEASONS_FILENAME = "../data/2009_2019_player_seasons_unique.csv"
player_seasons = pd.read_csv(PLAYER_SEASONS_FILENAME)

In [20]:
per_game_df = games_data.create_games_data(fd_complete, player_seasons, 7, ROLLING_GAMES_WINDOW)

In [21]:
per_game_df

Unnamed: 0,player_id_p1,player_id_p2,player_id_p3,player_id_p4,player_id_p5,player_id_p6,player_id_p7,player_id_p8,player_id_p9,player_id_p10,...,fp_p5,fp_p6,fp_p7,fp_p8,fp_p9,fp_p10,fp_p11,fp_p12,fp_p13,fp_p14
0,bealbr01,leonaka01,georgpa01,willilo02,harremo01,bertada01,bryanth01,mahinia01,mcraejo01,smithis01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,isaacjo01,fournev01,burksal01,greendr01,bowmaky01,pascher01,spellom01,gordoaa01,chrisma01,fultzma01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,siakapa01,vanvlfr01,bogdabo02,mitchdo01,lowryky01,goberru01,ibakase01,powelno01,conlemi01,gasolma01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Predict on model

In [22]:
x_range = range(14,392)
X = per_game_df.iloc[:, x_range]

In [23]:
X

Unnamed: 0,fgm_seas_avg_p1,fgm_seas_avg_p2,fgm_seas_avg_p3,fgm_seas_avg_p4,fgm_seas_avg_p5,fgm_seas_avg_p6,fgm_seas_avg_p7,fgm_seas_avg_p8,fgm_seas_avg_p9,fgm_seas_avg_p10,...,pf_l5_p5,pf_l5_p6,pf_l5_p7,pf_l5_p8,pf_l5_p9,pf_l5_p10,pf_l5_p11,pf_l5_p12,pf_l5_p13,pf_l5_p14
0,9.317073,9.333333,9.181818,6.453333,6.658537,2.684211,4.291667,1.382353,2.259259,3.660714,...,2.0,3.0,2.2,2.4,2.0,0.6,4.0,3.6,2.2,3.2
1,3.493333,5.777778,3.0,2.848485,0.0,0.0,2.130435,6.025641,1.55814,3.421053,...,2.8,1.4,1.6,1.6,2.6,2.6,2.4,1.4,1.2,1.4
2,6.4875,3.84375,6.444444,8.584416,4.676923,5.876543,6.27027,3.216667,7.0,4.936709,...,2.4,3.2,2.6,2.4,1.8,3.4,2.8,1.8,3.6,3.0


In [34]:
model = pickle.load(open("../models/xgboost_7_player", "rb"))

In [35]:
predictions = model.predict(X)

In [36]:
names_range = range(0,14)
names_df = per_game_df.iloc[:, names_range].copy()

In [37]:
flat_names = np.matrix.flatten(names_df.values)
flat_preds = np.matrix.flatten(predictions)

In [38]:
prediction_df = pd.DataFrame(list(zip(flat_names, flat_preds)), columns=['player_id', 'prediction'])

In [39]:
comp_with_preds = pd.merge(competition, prediction_df, how='left', on='player_id')

In [40]:
comp_with_preds = comp_with_preds.dropna(subset=['prediction'])

In [41]:
comp_with_preds.to_csv("../data/optimization_input_" + competition_key + ".csv", index = False)