In [None]:
from pathlib import Path
from kedro.framework.context import load_context
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display, HTML

current_dir = Path.cwd()  # this points to 'notebooks/' folder
proj_path = current_dir.parent  # point back to the root of the project
context = load_context(proj_path)
catalog = context.catalog

import warnings

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

InteractiveShell.ast_node_interactivity = "all"
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

pd.set_option('display.multi_sparse', False)
pd.set_option('display.max_rows', 500)
pd.set_option('display.float_format', lambda x: '%8.2f' % x)

# Setting up the positional datasets
Here I'm going to load up a few datasets and then separate them out by position.

In [None]:
from phantasyfootballer.common import Stats, PLAYER_NAME, TEAM, POSITION


SCORING_MODE = 'ppr'
FP = Stats.FANTASY_POINTS


df_all = catalog.load(f'scoring.{SCORING_MODE}').set_index(PLAYER_NAME)
df_qb = df_all.query('position=="QB"')
df_wr = df_all.query('position=="WR"')
df_rb = df_all.query('position=="RB"')
df_te = df_all.query('position=="TE"')
df_qb.columns




In [None]:
df_all.groupby(POSITION)[[Stats.POS_VALUE_REM, POSITION]]#.sort_values(Stats.POS_VALUE_REM, ascending=False)


In [None]:
dir(pd.options.display)

In [None]:
df_all.groupby(POSITION).groups

# Defining plotting functions
here is is helpful to put plotting functions into a function, so I don't have to repeat everything in order to plot the graphs for each position

In [None]:
def plot_4(plot, data, title='', **kwargs):
    '''
    Max a 2x2 plot for each skill position.

    Parameters
    ----------
    plot : callable
        The plotting function to call
    
    data : list[data]
        a list with the four dataframes, QB, RB, WR, TE
    
    title: str
        The title of the plot
    
    top_player: bool
        If True, limit the output to only the top players,
        else use them all
    **kwargs : dict
        Any arguments required to make the plot correct
    '''
    data = [df_qb, df_rb, df_wr, df_te] if data is None else data
    #fig, ((ax1, ax2),(ax3, ax4)) = plt.subplots(2,2, figsize=(20,12))
    fig, ax = plt.subplots(2,2, figsize=(20,12))
    plot(data=data[0], **kwargs, ax=ax[0][0] ).set_title(f'{title} QB');
    plot(data=data[1], **kwargs, ax=ax[0][1]).set_title(f'{title} RB');
    plot(data=data[2], **kwargs, ax=ax[1][0]).set_title(f'{title} WR');
    plot(data=data[3], **kwargs, ax=ax[1][1]).set_title(f'{title} TE');

def plot_4_xonly(plot, data, stat, title, **kwargs):
    data = [df_qb, df_rb, df_wr, df_te] if data is None else data
    fig, ax = plt.subplots(2,2, figsize=(20,12))
    plot(data[0][stat], **kwargs, ax=ax[0][0] ).set_title(f'{title} QB');
    plot(data[1][stat], **kwargs, ax=ax[0][1]).set_title(f'{title} RB');
    plot(data[2][stat], **kwargs, ax=ax[1][0]).set_title(f'{title} WR');
    plot(data[3][stat], **kwargs, ax=ax[1][1]).set_title(f'{title} TE');

Now, just getting a sense of how many players are better than average (the average player will have a value of 1)

In [None]:
plot_4_xonly(sns.distplot, None, title='Distribution of `%` avg position',stat=Stats.PCT_MEAN_OVR);
plot_4(sns.boxplot, None,  'Distribution of `%` avg position',x=Stats.TOP_PLAYER, y=Stats.PCT_MEAN_OVR);


Looking at all the players, ranking them by their overall draft value based on the number of points expected over the average player, and the median player.

In [None]:
df_all[[PLAYER_NAME, Stats.POS_VALUE, Stats.POS_VALUE_REM]].sort_values(Stats.POS_VALUE_REM, ascending = False)[:50]

## Player Value
Alright, let's assume a player value is based on the value they bring from the worst player in the position that we are willing to consider (so this is the TOP_PLAYER filter).  Let's look at all the players and consider value based on that player.

In [None]:
df_all[Stats.VALUE_STATS]