In [None]:
from pathlib import Path
from kedro.framework.context import load_context

current_dir = Path.cwd()  # this points to 'notebooks/' folder
proj_path = current_dir.parent  # point back to the root of the project
context = load_context(proj_path)
catalog = context.catalog

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


# Setting up the positional datasets
Here I'm going to load up a few datasets and then separate them out by position.

In [None]:
from phantasyfootballer.common import Stats, PLAYER_NAME, TEAM, POSITION


SCORING_MODE = 'ppr'
FP = Stats.FANTASY_POINTS


df_all = catalog.load(f'scoring.{SCORING_MODE}')
df_qb = df_all.query('position=="QB"')
df_wr = df_all.query('position=="WR"')
df_rb = df_all.query('position=="RB"')
df_te = df_all.query('position=="TE"')


len(df_qb)
len(df_rb)
len(df_te)
len(df_wr)
df_qb.columns


Now, just getting a sense of how many players are better than average (the average player will have a value of 1)

In [None]:
fig, ((ax1, ax2),(ax3, ax4)) = plt.subplots(2,2, figsize=(20,12))
sns.distplot(df_qb.percent_average_position,ax=ax1).set_title('Distribution of `%` avg position QB');
sns.distplot(df_rb.percent_average_position, ax=ax2).set_title('Distribution of `%` avg position RB');
sns.distplot(df_wr.percent_average_position, ax=ax3).set_title('Distribution of `%` avg position WR');
sns.distplot(df_te.percent_average_position, ax=ax4).set_title('Distribution of `%` avg position TE');

# Defining plotting functions
here is is helpful to put plotting functions into a function, so I don't have to repeat everything in order to plot the graphs for each position

In [None]:
def plot_4(plot, data, title, **kwargs):
    '''
    Max a 2x2 plot for each skill position.

    Parameters
    ----------
    plot : callable
        The plotting function to call
    
    data : list[data]
        a list with the four dataframes, QB, RB, WR, TE
    
    title: str
        The title of the plot
    
    top_player: bool
        If True, limit the output to only the top players,
        else use them all
    **kwargs : dict
        Any arguments required to make the plot correct
    '''

    fig, ((ax1, ax2),(ax3, ax4)) = plt.subplots(2,2, figsize=(20,12))
    plot(data=data[0], **kwargs, ax=ax1 ).set_title(f'{title} QB');
    plot(data=data[1], **kwargs, ax=ax2).set_title(f'{title} RB');
    plot(data=data[2], **kwargs, ax=ax3).set_title(f'{title} WR');
    plot(data=data[3], **kwargs, ax=ax4).set_title(f'{title} TE');

def box_4(data, **kwargs):
    for pos in data:
        sns.distplot(pos)

In [None]:
plot_4(sns.boxplot, [df_qb, df_rb, df_wr, df_te],  'Distribution of `%` avg position',x=Stats.TOP_PLAYER, y=Stats.PCT_MEAN_OVR);


Looking at all the players, ranking them by their overall draft value based on the number of points expected over the average player, and the median player.

In [None]:
df_overall = df_all.query(Stats.TOP_PLAYER).sort_values(Stats.PCT_MEAN_OVR,ascending=False)[[PLAYER_NAME, POSITION, Stats.RANK, Stats.POS_RANK, Stats.PCT_MEAN_OVR, Stats.PCT_MEDIAN_OVR]]
df_overall

## Player Value
Alright, let's assume a player value is based on the value they bring from the worst player in the position that we are willing to consider (so this is the TOP_PLAYER filter).  Let's look at all the players and consider value based on that player.

In [None]:
top_players = df_all[df_all[Stats.TOP_PLAYER]].sort_values(Stats.FANTASY_POINTS, ascending=False)
top_players.groupby(POSITION)[Stats.FANTASY_POINTS].sum()

In [None]:
df_all['position_value'] = df_all.groupby(POSITION)[FP].transform(lambda x: x / x.sum())
df_all['value_remaining'] = 1-df_all[[POSITION,'position_value']].sort_values('position_value', ascending=False).groupby(POSITION).cumsum()

In [None]:
df_all.sort_values('value_remaining',ascending=False)