In [1]:
import pandas as pd
from datetime import datetime
from basketball_reference_web_scraper import client
import joblib
import unicodedata
import numpy as np
import xgboost as xgb
import os
from sqlalchemy import *
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session

In [23]:
engine = create_engine('postgresql://postgres:Carpet!23@fanduel-lineup-prediction-database.cvzkizpca2fx.us-east-1.rds.amazonaws.com')
Base = automap_base()

[]

In [None]:
#Load player_slugs_names and team abbreviations and team full names from saved joblib objects

player_slugs_names = joblib.load('joblib_objects/player_slugs_names')
team_abbreviations_full_name_dict = joblib.load('joblib_objects/team_abbreviations_full_name_dict')

In [None]:
#Load contest csv with full available players

contest_file = 'test_contest/test_contest.csv'
contest_df = pd.read_csv(contest_file,index_col=0)
contest_df = contest_df.rename(columns={'Id':'FD_player_ID'})
contest_df = contest_df.loc[contest_df['Injury Indicator'] != 'O',:]
contest_df

In [None]:
def strip_accents(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                  if unicodedata.category(c) != 'Mn')

contest_df = contest_df[['FD_player_ID','Position','Nickname','FPPG','Salary','Game','Opponent']]
contest_df['Nickname'] = contest_df['Nickname'].apply(lambda x: x.replace('.',''))
contest_df['slug'] = contest_df['Nickname'].apply(lambda x: player_slugs_names[x])

In [None]:
contest_df['location'] = ''

for i in range(len(contest_df)):
    home_team = contest_df['Game'].iloc[i].split('@')[1]
    if contest_df['Opponent'].iloc[i] == home_team:
        location = 1
    else:
        location = 0
        
    contest_df['location'].iloc[i] = location

In [None]:
contest_df['Opponent'] = contest_df['Opponent'].apply(lambda x: team_abbreviations_full_name_dict[x])

In [None]:
#Load label encoders

player_label_encoder = joblib.load('joblib_objects/player_label_encoder')
team_label_encoder = joblib.load('joblib_objects/team_label_encoder')

In [None]:
contest_df['Opponent_ID'] = contest_df['Opponent'].apply(lambda x: team_label_encoder.transform([x])[0])
contest_df['player_ID'] = contest_df['slug'].apply(lambda x: player_label_encoder.transform([x])[0])

In [None]:
base_features = ['FD_pts_scored','location','opponent_id','points_scored','seconds_played','made_field_goals',
                 'attempted_field_goals', 'made_three_point_field_goals', 'attempted_three_point_field_goals',
                 'made_free_throws', 'attempted_free_throws', 'offensive_rebounds','defensive_rebounds', 'assists', 'steals', 
                 'blocks', 'turnovers','game_score', 'rest', 'no_rest', '1_day_rest', '2_day_rest','3_day_rest', '4_day_rest', 
                 '5_day_rest', '5_plus_day_rest','Simple_Rating_System', 'Offensive_Rating', 'Defensive_Rating',
                 'Net_Rating', 'Pace', 'Free_Throw_Rate', '3_Pt_Rate','Turnover_Percentage', 'Offensive_Rebound_Percentage', 
                 'Opponent_EFG','Opponent_Turnover_Percentage', 'Opponent_Defensive_Rebound_Percentage']

past_7_features = ['points_scored','seconds_played','made_field_goals',
                 'attempted_field_goals', 'made_three_point_field_goals', 'attempted_three_point_field_goals',
                 'made_free_throws', 'attempted_free_throws', 'offensive_rebounds','defensive_rebounds', 'assists', 'steals', 
                 'blocks', 'turnovers','game_score']

In [None]:
advanced_analytics_table = pd.read_csv('cleaned_data/advanced_analytics/advanced_analytics_total.csv',index_col=0)
advanced_analytics_columns = advanced_analytics_table.columns 
advanced_analytics_columns[2:]

In [None]:
#Get slug for player and load player csv

test_player = contest_df.iloc[0]
test_player_slug = test_player['slug']

df = pd.read_csv(f'cleaned_data/player_data/{test_player_slug}.csv',index_col=0)

In [None]:
def get_historic_features(df):

    for feature in past_7_features:
            df[f'{feature}_last_7'] = df[feature].rolling(window=7).mean()

            for i in range(len(df)):
                if i < 7:
                    df[f'{feature}_last_7'].iloc[i] = df[feature].iloc[0:(i+1)].rolling(i+1).mean().mean()
                else:
                    pass
                
    df_totals = df[past_7_features]
    
    for feature in past_7_features:
        df_totals[f'{feature}_average'] = ''
        for i in range(len(df_totals)):
            df_totals[f'{feature}_average'].iloc[i] = df_totals[feature].iloc[0:i+1].mean()
        else:
            pass
        
    df_totals_columns = [f'{feature}_average' for feature in past_7_features]
    
    df_totals = df_totals[df_totals_columns]

    df_model_features = df
    df_model_features = df_model_features.reset_index()

    df_totals = df_totals.reset_index()


    df_model_features = pd.merge(df_model_features,df_totals,on='index')
    df_model_features = df_model_features.drop('index',axis=1)
    df_model_features = df_model_features.drop('rest',axis=1)
    df_model_features = df_model_features.drop(past_7_features,axis=1)
    
    return df_model_features

In [None]:
X_features = ['points_scored_last_7','seconds_played_last_7', 'made_field_goals_last_7','attempted_field_goals_last_7', 
            'made_three_point_field_goals_last_7','attempted_three_point_field_goals_last_7', 'made_free_throws_last_7',
            'attempted_free_throws_last_7', 'offensive_rebounds_last_7','defensive_rebounds_last_7', 'assists_last_7', 
            'steals_last_7','blocks_last_7', 'turnovers_last_7', 'game_score_last_7','points_scored_average', 
            'seconds_played_average','made_field_goals_average', 'attempted_field_goals_average',
            'made_three_point_field_goals_average','attempted_three_point_field_goals_average', 'made_free_throws_average',
            'attempted_free_throws_average', 'offensive_rebounds_average','defensive_rebounds_average', 'assists_average', 
            'steals_average','blocks_average', 'turnovers_average', 'game_score_average']

most_recent_game = get_historic_features(df).iloc[-1]
most_recent_game_date = most_recent_game['date']
most_recent_game = most_recent_game[X_features]

In [None]:
import datetime

current_date = datetime.datetime.now()
most_recent_game_date_converted = datetime.datetime.strptime(most_recent_game_date,'%Y-%m-%d')
days_rest = (current_date - most_recent_game_date_converted - datetime.timedelta(days=1)).days

In [None]:
location_opponent_id_df = pd.DataFrame({'location':[test_player['location']],'Opponent_ID':[test_player['Opponent_ID']]})

location_opponent_id_df['no_rest'] = 1 if days_rest == 0 else 0
location_opponent_id_df['1_day_rest'] = 1 if days_rest == 1 else 0
location_opponent_id_df['2_day_rest'] = 1 if days_rest == 2 else 0
location_opponent_id_df['3_day_rest'] = 1 if days_rest == 3 else 0
location_opponent_id_df['4_day_rest'] = 1 if days_rest == 4 else 0
location_opponent_id_df['5_day_rest'] = 1 if days_rest == 5 else 0
location_opponent_id_df['5_plus_day_rest'] = 1 if days_rest > 5 else 0

opponent_id = location_opponent_id_df['Opponent_ID'].iloc[0]
analytics = advanced_analytics_table.loc[(advanced_analytics_table['year'] == 2019) & (advanced_analytics_table['Team_ID'] == opponent_id),advanced_analytics_columns[2:]]
analytics = analytics[['Simple_Rating_System', 'Offensive_Rating',
       'Defensive_Rating', 'Net_Rating', 'Pace', 'Free_Throw_Rate','3_Pt_Rate', 'Turnover_Percentage', 'Offensive_Rebound_Percentage',
       'Opponent_EFG', 'Opponent_Turnover_Percentage','Opponent_Defensive_Rebound_Percentage']]

In [None]:
testing_df = pd.concat([location_opponent_id_df.iloc[0],analytics.iloc[0],most_recent_game],axis=0)
tesing_list = np.array(testing_df).reshape(-1,1)

xgb_model = joblib.load(f'models/{test_player_slug}_model.dat')

In [None]:
prediction = xgb_model.predict(pd.DataFrame(testing_df).transpose().values)

In [None]:
predictions_df = pd.DataFrame(columns=['slug','pts_spread','position','salary'])
slug = test_player['slug']
salary = test_player['Salary']
position = test_player['Position']
pts_projection = (salary / 1000) * 5
pts_spread = (prediction - pts_projection) / 10

predictions_df.append(
            {
                'slug': slug,
                'pts_spread': pts_spread,
                'position': position,
                'salary': salary
            },
            ignore_index=True)

In [None]:
import random

test_outcome = contest_df[['slug','Position','Salary']]
test_outcome['pts_spread'] = 1
test_outcome['pts_spread'] = test_outcome['pts_spread'].apply(lambda x: x*np.random.uniform(0, 1))

In [None]:
sorted_predictions_pg_df = test_outcome.loc[test_outcome['Position'] == 'PG'].sort_values('pts_spread',ascending=True).iloc[:5].to_numpy()
sorted_predictions_sg_df = test_outcome.loc[test_outcome['Position'] == 'SG'].sort_values('pts_spread',ascending=True).iloc[:5].to_numpy()
sorted_predictions_sf_df = test_outcome.loc[test_outcome['Position'] == 'SF'].sort_values('pts_spread',ascending=True).iloc[:5].to_numpy()
sorted_predictions_pf_df = test_outcome.loc[test_outcome['Position'] == 'PF'].sort_values('pts_spread',ascending=True).iloc[:5].to_numpy()
sorted_predictions_c_df = test_outcome.loc[test_outcome['Position'] == 'C'].sort_values('pts_spread', ascending=True).iloc[:5].to_numpy()

In [None]:
import random

contest_df['values'] = 1
contest_df['values'] = contest_df['values'].apply(lambda x: x*np.random.uniform(0, 1))

max_salary = 60000
salaries = contest_df['Salary'].values
values = contest_df['values'].values

# List of FD positions.
FD_POSITION_LIST = ['PG', 'SG', 'PF', 'SF', 'C']

In [None]:
selection = cvxpy.Variable(len(salaries))

salary_constraint = salaries * selection <= max_salary
lineup_constraint = np.ones(len(salaries)) * selection == 9
constraints = [salary_constraint,lineup_constraint]
position_min = [2, 2, 2, 2, 1]
pos_limits = {}

i = 0
for pos in FD_POSITION_LIST:
    constraints.append((pos_limits[pos] * selection) >= position_min[i])
    i += 1
    
total_value = values @ selection

In [None]:
knapsack_problem = cvxpy.Problem(cvxpy.Maximize(total_value), constraints)

In [None]:
test = pd.DataFrame({'PG1':[1,2],'PG2':[1,3], 'SG1':[1,2], 'SG2':[1,3]})

In [None]:
test.loc[(test['PG1'] != test['PG2'] & test['SG1'] != test['SG2']),:]

In [7]:
modelsos.listdir('../models')

['abrinal01_model.dat',
 'acyqu01_model.dat',
 'adamsst01_model.dat',
 'adebaba01_model.dat',
 'afflaar01_model.dat',
 'ajincal01_model.dat',
 'aldrico01_model.dat',
 'aldrila01_model.dat',
 'allenja01_model.dat',
 'allenka01_model.dat',
 'allenla01_model.dat',
 'allento01_model.dat',
 'aminual01_model.dat',
 'anderal01_model.dat',
 'anderch01_model.dat',
 'anderju01_model.dat',
 'anderky01_model.dat',
 'anderry01_model.dat',
 'anigbik01_model.dat',
 'antetgi01_model.dat',
 'anthoca01_model.dat',
 'anthojo01_model.dat',
 'anunoog01_model.dat',
 'arcidry01_model.dat',
 'arizatr01_model.dat',
 'artesro01_model.dat',
 'arthuda01_model.dat',
 'artisja01_model.dat',
 'asikom01_model.dat',
 'augusdj01_model.dat',
 'babbilu01_model.dat',
 'bacondw01_model.dat',
 'bakerro01_model.dat',
 'baldwwa01_model.dat',
 'balllo01_model.dat',
 'barbole01_model.dat',
 'bareajo01_model.dat',
 'barneha02_model.dat',
 'barnema02_model.dat',
 'bartowi01_model.dat',
 'bassbr01_model.dat',
 'batumni01_model.dat