# FanDuel data

The purpose of this file is to take in an upcoming FanDuel competition (csv) and generate the inputs necessary for the ML model

In [70]:
import nbimporter
import pandas as pd
from datetime import datetime
import player_matches_data as pm_data
import games_data
import pickle
import numpy as np

In [71]:
competition = pd.read_csv("../data/FanDuel_sample_competition.csv")

In [72]:
competition = competition[competition['Injury Indicator'] != "O"].copy()

In [73]:
date = datetime.today()
year = date.year
month = date.month
day = date.day

date = datetime(year, month, day) # strips precision

In [74]:
def add_home_game(row):
    return row['team_key'] == row['Game'].split("@")[1]

In [75]:
competition['date'] = date
competition['year'] = year
competition['month'] = month
competition['day'] = day
competition = competition.rename(
    columns={
        'Nickname': 'name',
        'Team': 'team_key',
        'Opponent': 'opponent_key'
    }
)
competition['home_game'] = competition.apply(add_home_game, axis = 1)

In [76]:
name_id_mapping = pd.read_csv("../data/name_id_mapping.csv")

In [77]:
def add_player_id(row):
    name = row['name']
    ids = name_id_mapping[name_id_mapping.name == name]['player_id']
    
    if len(ids) > 0:
        return ids.iloc[0]
    else:
        print("Please update player_id for name " + name)
        return None

In [78]:
competition['player_id'] = competition.apply(add_player_id, axis = 1)

Please update player_id for name Norvel Pelle
Please update player_id for name Thanasis Antetokounmpo
Please update player_id for name Jalen McDaniels
Please update player_id for name Michael Frazier
Please update player_id for name Robert Franks
Please update player_id for name Charlie Brown
Please update player_id for name Brian Bowen
Please update player_id for name Brandon Goodwin
Please update player_id for name Marial Shayok


In [79]:
competition = competition.dropna(subset=['player_id'])

In [80]:
fd_complete = pm_data.add_game_ids(competition)
fd_complete = pm_data.add_season_start_year(fd_complete)

In [81]:
stats = ['secs', 'fgm', 'fga', '3pm', '3pa', 'ftm', 'fta', 'orb', 'drb', 'ast', 'stl', 'blk', 'tvr', 'pf', 'fp']

In [82]:
for stat in stats:
    fd_complete[stat] = 0

In [83]:
old_data = pd.read_csv("../data/20100101_20191129_player_matches.csv")

ROLLING_GAMES_WINDOW = 5

In [84]:
fd_complete = pm_data.roll_with_old_data(fd_complete, old_data, stats, ROLLING_GAMES_WINDOW)

## Create games dataframe (input to model)

In [85]:
PLAYER_SEASONS_FILENAME = "../data/2009_2019_player_seasons_unique.csv"
player_seasons = pd.read_csv(PLAYER_SEASONS_FILENAME)

In [86]:
per_game_df = games_data.create_games_data(fd_complete, player_seasons, 7, ROLLING_GAMES_WINDOW)

In [87]:
per_game_df

Unnamed: 0,player_id_p1,player_id_p2,player_id_p3,player_id_p4,player_id_p5,player_id_p6,player_id_p7,player_id_p8,player_id_p9,player_id_p10,...,fp_p5,fp_p6,fp_p7,fp_p8,fp_p9,fp_p10,fp_p11,fp_p12,fp_p13,fp_p14
0,hardeja01,youngtr01,westbru01,capelca01,parkeja01,houseda01,bembrde01,parsoch01,tuckepj01,huntede01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,antetgi01,grahade01,roziete01,bledser01,bridgmi02,middlkh01,bendedr01,reynoca01,washipj01,biyombi01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,embiijo01,sabondo01,simmobe01,brogdma01,harrito02,richajo01,lambje01,horfoal01,turnemy01,mitrona01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Predict on model

In [88]:
x_range = range(14,392)
X = per_game_df.iloc[:, x_range]

In [89]:
X

Unnamed: 0,fgm_seas_avg_p1,fgm_seas_avg_p2,fgm_seas_avg_p3,fgm_seas_avg_p4,fgm_seas_avg_p5,fgm_seas_avg_p6,fgm_seas_avg_p7,fgm_seas_avg_p8,fgm_seas_avg_p9,fgm_seas_avg_p10,...,pf_l5_p5,pf_l5_p6,pf_l5_p7,pf_l5_p8,pf_l5_p9,pf_l5_p10,pf_l5_p11,pf_l5_p12,pf_l5_p13,pf_l5_p14
0,10.807692,6.481481,8.630137,7.074627,5.765625,3.025641,3.353659,2.72,2.52439,0.0,...,3.8,2.0,2.0,2.0,2.8,3.0,1.6,2.2,1.4,4.4
1,10.013889,1.608696,3.265823,6.025641,2.9625,6.571429,1.913043,1.736842,0.0,1.648148,...,2.0,2.6,2.8,2.0,3.0,2.6,2.2,1.6,2.4,2.4
2,9.0625,5.581081,6.835443,5.90625,7.45122,5.794521,5.455696,5.691176,5.135135,0.428571,...,2.2,2.8,1.8,0.6,3.0,3.0,2.8,1.4,1.8,2.333333


In [90]:
model = pickle.load(open("../models/xgboost_7_player", "rb"))

In [91]:
predictions = model.predict(X)

In [92]:
names_range = range(0,14)
names_df = per_game_df.iloc[:, names_range].copy()

In [93]:
flat_names = np.matrix.flatten(names_df.values)
flat_preds = np.matrix.flatten(predictions)

In [94]:
prediction_df = pd.DataFrame(list(zip(flat_names, flat_preds)), columns=['player_id', 'prediction'])

In [95]:
comp_with_preds = pd.merge(competition, prediction_df, how='left', on='player_id')

In [96]:
comp_with_preds = comp_with_preds.dropna(subset=['prediction'])

In [97]:
comp_with_preds.to_csv("../data/optimization_input.csv", index = False)