# Fantasy Hockey Draft

This notebook will simulate a fantasy hockey draft using Markowitz optimization stradegies, and also supports the possiblity of a human player. 

In [None]:
import numpy as np
import pandas as pd
import cvxpy as cp
import importlib
import sys

sys.path.insert(1, '../')
import scripts.hockey_bots as hockey

# need to keep reloading for development work because 
# I apparently like Jupyter too much 
importlib.reload(hockey)

## Importing Data Created in `Data Wrangling.ipynb`

In [None]:
player_data = pd.read_csv("../data/fixed_data_2018.csv")



next_year = pd.read_csv("../data/fixed_data_2018.csv")
# aggregate all scores into an array for each player
scores = player_data[['player_id', 'points',]].groupby('player_id').agg(lambda x: list(x)).reset_index()
scores_next = next_year[['player_id', 'points',]].groupby('player_id').agg(lambda x: list(x)).reset_index()


scores_next = scores_next[scores_next.player_id.isin(scores.player_id.unique())]
scores = scores[scores.player_id.isin(scores_next.player_id)].reset_index(drop = True)
scores_next = scores_next.set_index(scores.player_id).reset_index(drop=True)

player_data = player_data[player_data.player_id.isin(scores.player_id)]

all_points = pd.DataFrame(np.transpose(scores.points.tolist()), columns = scores.player_id)
all_points_next = pd.DataFrame(np.transpose(scores_next.points.tolist()), columns = scores_next.player_id)
idx = list(all_points.mean().sort_values(ascending=False).index)


In [None]:
player_data

In [None]:

all_points = (all_points - all_points.min().max())/(all_points.max().max() - all_points.min().min())
all_points

## Finding Indexes
As we're only interested in aggregate inddexes of average player returns, we need to find each players index according to the aggregation, as well s the indexes of the players in each position done below.

In [None]:
# Finding index in an aggregate score for each position
pointies = list(all_points.mean().index)

defence = hockey.position_indexes(pointies,all_points,player_data,idx, "D")
center = hockey.position_indexes(pointies,all_points,player_data,idx, "C")
goalie = hockey.position_indexes(pointies,all_points, player_data,idx,"G")
right_wingers = hockey.position_indexes(pointies, all_points,player_data,idx,"RW")
left_wingers = hockey.position_indexes(pointies, all_points,player_data,idx,"LW")

In [None]:
def exclude_retired(players, names):
    retired = []
    for name in names:
        first, last = name.split(" ")

        df = players[(players.firstName.str.contains(first, case=False)) & 
                   (players.lastName.str.contains(last, case=False))]
       
        try:
            p = df['player_id'].unique()[0]
            df2 = all_points.mean().reset_index()
            player_index = list(df2[df2['player_id'] == p].index)[0]
            retired.append(player_index)
        except:
            print(name, "is bad")
        
    return retired 

ret_names = ['Brooks Orpik', 'Matt Hendricks', "Roberto Luongo", 'Chrus Butler', 'Matt Cullen', 
             'Chris Kunitz', 'Wade Megan', 'Stephen Gionta', 'Mike McKenna', 'Cam Ward', 
             'Ben Lovejoy', 'Niklas Kronwall', 'Dan Giardi', 'Eric Gryba', 'Lee Stempniak', 
             'Scott Eansor', 'Michael Leighton', 'Chris Thorburn', 'Dennis Seidenberg']



## Running the Draft

Here we run the actual draft. Noe that `order` defines the order of the draft and automatically reverses after each round. Also note that the teams will be returned in the order that tay appear in the `functions` list in the final result. 

In [None]:
functions = [hockey.optim_player, 
             hockey.optim_player, 
             hockey.optim_player,
             hockey.optim_player, 
            hockey.optim_player, 
            hockey.optim_player, 
            hockey.human,
            hockey.human]

greedy_selections = {}
greedy_selections['goalie'] = []
greedy_selections['defence'] = []
greedy_selections['center'] = []
greedy_selections['right_winger'] = []
greedy_selections['left_winger'] = []


order = [3,0,6,7,2,4,5,1]

print(len(order), len(functions))
team_names =["Fighting Squirrels", "Snorky Speak Man", "Sad Skaters", 
            "Burning Ice", "destructus", "frozen hope",
            "Byron", "Big G"]

team_names = [x.upper() for x in team_names]
args = dict(scores = all_points, 
            gammaa = [0.01, 0.01, 0.03, 0.8, 0, 0.89, None, None], 
            greedy_selections = greedy_selections,
            df = player_data,
            defence = defence,
            goalie = goalie,
            center = center,
            left_wingers = left_wingers,
            right_wingers = right_wingers,
            selection = ['max', 'optim', 'optim', 'max', 'max', 'optim', None, None],
            sub_gamma = [None, 0.3, 0.8, None, None, .02, None, None])

taken = exclude_retired(player_data, ret_names)
all_players, teams = hockey.draft(functions, order, pause= True, team_names = team_names, team_size=16, **args)





# Seeing Results

Below the distributions of points (from the previous season) are shown based on the teams constructed in `teams` 

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
next_year = pd.read_csv("../data/scores2.csv_2017")
team_names =["Fighting Squirrels", "Snorky Talk Man", "Sad Skaters", 
            "Burning Ice", "destructus", "frozen hope",
            "Byron", "Graham"]
order = [3,0,6,7,2,4,5,1]
team_names = [x.upper() for x in team_names]
fig, ax = plt.subplots(3, 3)
fig.set_size_inches(12, 10.5)
fig.tight_layout(pad =6)
cols = []
sns.set(font_scale = 1)
for_frame = {'Team Name': [], 'Average Player Return': [], "Standard Deviation":[],
             "Maximum Possible Points (2017)":[]}
for i in range(len(teams)):
    ohboy = next_year[next_year.player_id.isin(list(all_points.iloc[:,teams[order[i]]]))]
    if i == 1:
        ohboy_squirrel = ohboy
    playerids = ohboy.groupby('player_id').count().index

    lineup= next_year[next_year.player_id.isin(playerids)].groupby(['firstName', 'lastName', 'primaryPosition']).mean()
 
    mean_score = next_year[next_year.player_id.isin(playerids)]['points'].mean()
    sum_score = next_year[next_year.player_id.isin(playerids)]['points'].sum()
    variance_score = next_year[next_year.player_id.isin(playerids)]['points'].std() 
    for_frame['Team Name'].append(team_names[order[i]])
    for_frame['Average Player Return'].append(mean_score)
    for_frame["Standard Deviation"].append(variance_score)
    for_frame["Maximum Possible Points (2017)"].append(sum_score)
    
    next_year[next_year.player_id.isin(playerids)]['points'].hist(ax=ax[i//3, i%3], bins=30)
    title =  team_names[order[i]] + '\n' + " Maximum Possible Points = " + str(round(sum_score,0)) 
    gs = [0.01, 1.3, 0.8, 0.5, 0.0, 0.03, 0.01, .89, 0.01]
    gs2 = [None, None, None, None, None, .8, .3, .02, 0.99]
    
#     if functions[order[i]].__name__ == 'optim_player':
#         title += "\ngamma = " + str(gs[order[i]]) 
#         if gs2[order[i]]:
#             title += "\ngamma2 = " + str(gs2[order[i]])
    cols.append(title)
    ax[i//3, i%3].set_title(title)
    ax[i//3, i%3].set_ylim([0,600])
    ax[i//3, i%3].set_xlim([-25,25])
    ax[i//3, i%3].set_ylabel("Count")
    ax[i//3, i%3].set_xlabel("Bin Value")

    
                           
    # lineup

In [None]:
pd.DataFrame(for_frame).sort_values("Average Player Return", ascending=False).round(2)

# Uncertainty Estimation

Below we simulate one of our teams via Monte Carlo to establish upper and lower bounds of what we may expect for performance of the teams we have generated in the draft

In [None]:
import seaborn as sns
plt.figure(figsize=(12,10))
ax = sns.distplot(ohboy_squirrel.points, kde=True, bins=15)
sum_score = ohboy_squirrel.points.sum()
sns.set(font_scale = 2)
ax.set_ylabel("Density", size = 20)
ax.set_xlabel("Points", size = 20)
ax.set_title("Fantasy Team Score 2018 - 2019\n" + "Total Points = " + str(round(sum_score,1)), size=22  )
ax.set_xlim([-15,35])

In [None]:
import scipy
import numpy as np
from IPython.display import clear_output
simulated_score = []

arr = []
for i in range(10000):
    for name in ohboy_squirrel.lastName.unique():
        sampler = scipy.stats.gaussian_kde(ohboy_squirrel[ohboy_squirrel.lastName==name].points)
        new_data=sampler.resample(82)
        df = pd.DataFrame(new_data.T, columns=['points'])
        df['lastName'] = name
        simulated_score.extend([df])
        arr.extend([df.points.sum()])
        print(i)
        clear_output(wait=True)



In [None]:
simulated_score2 = pd.concat(simulated_score, ignore_index=True)
sum_score = simulated_score2.points.sum()/10000
print(sum_score, np.std(np.array(arr)))



In [None]:
plt.figure(figsize=(12,10))
sns.set(font_scale = 2)
sim_score = simulated_score2.copy()
ax = sns.distplot(sim_score.points, kde=True, hist=False, label="Simulated")
ax.set_ylabel("Density", size = 20)
ax.set_xlabel("Points", size = 20)
ax.set_title("Simulated Fantasy Team Score 2019 - 2020\n" + "Total Points = " + str(round(sum_score,1)) +
             " $\pm$ " + str(round(2 * np.std(np.array(arr)),1)) + " (2$\sigma$)"
             , size=22  )
sns.distplot(ohboy.points, kde=True, hist=False, label="2018")
ax.set_xlim([-15,35])
ax.set_ylim([0,0.14])