In [3]:
import pickle
import plotly.express as px
from plotly.graph_objs import Figure


In [4]:
def create_figure(df, x_column, y_column) -> Figure:
    x_data = df[x_column]
    y_data = df[y_column]

    min_value = y_data.min()
    max_value = y_data.max()
    season_value = y_data.iloc[-1]
    
    fig = px.line(x=x_data, y=y_data, markers=True)
    fig.update_layout(
        xaxis_title=x_column,
        yaxis_title=y_column,
        title=f'{df['Name'].iloc[0]} {y_column} over time'
    )
    fig.add_hline(y=min_value, line_dash="dot", line_color="black", annotation_text=f'Min: {min_value}', annotation_position="bottom right")
    fig.add_hline(y=max_value, line_dash="dot", line_color="black", annotation_text=f'Max: {max_value}', annotation_position="top right")
    
    fig.add_annotation(text=f"End of Season Value {season_value}", x=x_data.iloc[-12], y=season_value-season_value*0.05, showarrow=False)
    
    fig.update_yaxes(nticks=20)
    return fig

In [5]:
player_dfs_2023 = pickle.load(open('2023_player_dfs.pkl', 'rb'))

In [6]:
player_dfs_2023.keys()

dict_keys([682928, 547989, 660670, 642715, 645277, 624413, 641313, 606115, 571448, 668227, 650333, 595879, 605137, 641355, 643217, 605141, 666182, 593428, 664761, 608324, 600869, 592192, 682998, 592206, 656305, 661388, 621043, 641487, 630105, 605204, 650559, 646240, 553869, 650490, 592273, 669242, 642731, 664034, 518692, 670770, 666969, 665926, 502671, 663757, 665489, 666971, 664023, 671739, 663647, 669720, 683002, 571771, 606192, 663538, 663697, 673962, 673490, 680757, 518934, 596019, 663993, 592518, 606466, 656716, 641857, 643446, 669004, 608841, 593160, 571970, 607043, 663457, 660271, 621566, 681546, 542303, 670623, 665161, 521692, 663728, 608070, 592663, 592669, 668804, 663586, 673357, 677594, 667670, 642708, 660688, 668939, 467793, 623993, 656941, 608369, 543760, 642086, 669257, 624585, 665742, 543807, 668715, 663886, 681082, 664702, 553993, 669261, 621020, 657041, 679529, 650402, 678662, 663656, 457759, 607208, 662139, 683734, 657077, 663837, 683011, 664774, 572233, 677951, 59288

In [7]:
player_dfs_2023[641355]

Unnamed: 0,Name,Age,#days,Lev,Date,Tm,G,PA,AB,R,...,OBP,SLG,OPS,mlbID,Day,wOBA,K%,BB%,ISO,wRC+
24,Cody Bellinger,27,311,Maj-NL,2023-03-30,Chicago,1,4,3,0,...,0.250,0.000,0.250,641355,0,0.172500,0.250000,0.250000,0.000000,-1.800000
28,Cody Bellinger,27,309,Maj-NL,2023-04-01,Chicago,2,8,7,0,...,0.125,0.000,0.125,641355,2,0.086250,0.375000,0.125000,0.000000,-8.700000
29,Cody Bellinger,27,308,Maj-NL,2023-04-02,Chicago,3,12,11,0,...,0.083,0.000,0.083,641355,3,0.057500,0.333333,0.083333,0.000000,-11.000000
29,Cody Bellinger,27,307,Maj-NL,2023-04-03,Chicago,4,17,15,1,...,0.177,0.267,0.443,641355,4,0.257059,0.235294,0.117647,0.200000,4.964706
30,Cody Bellinger,27,306,Maj-NL,2023-04-04,Chicago,5,23,20,2,...,0.304,0.350,0.654,641355,5,0.336087,0.173913,0.130435,0.150000,11.286957
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55,Cody Bellinger,27,131,Maj-NL,2023-09-26,Chicago,126,538,482,93,...,0.359,0.533,0.892,641355,180,0.472579,0.156134,0.072491,0.224066,22.206355
55,Cody Bellinger,27,130,Maj-NL,2023-09-27,Chicago,127,543,487,94,...,0.359,0.532,0.891,641355,181,0.471500,0.156538,0.071823,0.221766,22.120000
55,Cody Bellinger,27,129,Maj-NL,2023-09-28,Chicago,128,547,491,94,...,0.358,0.530,0.888,641355,182,0.469669,0.157221,0.071298,0.219959,21.973529
55,Cody Bellinger,27,128,Maj-NL,2023-09-29,Chicago,129,551,495,94,...,0.356,0.525,0.881,641355,183,0.466241,0.157895,0.070780,0.218182,21.699270


In [8]:
fig = create_figure(player_dfs_2023[641355], x_column='G', y_column='OPS')
fig.show()
type(fig)

plotly.graph_objs._figure.Figure

In [9]:
type(player_dfs_2023[641355]['G'])

pandas.core.series.Series

In [38]:
fig = create_figure(player_dfs_2023[641355], x_column='G', y_column='OPS')
fig.show()

In [30]:
len(player_dfs_2023[643217])

149

In [46]:
def sliding_window_stability(dfs:dict) -> dict:
    """
    Works by computing the standard deviation for a given stat over a sliding window of games
    so that we can see how a player's performance stabilizes over time
    """
    stability = {}
    i=0
    window_size = 10 # games
    std_threshold = 0.05
    mean_threshold = 0.05
    stat_columns = ['BA','OBP','SLG','OPS','wOBA','K%','BB%','ISO','wRC+']

    for player_id, df in dfs.items():
        stability[player_id] = {} 
        window_stats = df.iloc[i:i+window_size][stat_columns]

        for column in stat_columns:
            stability[player_id][f'Season {column}'] = df[column].iloc[-1]

        while i+window_size < len(df):
            window_std = window_stats.std()
            window_mean = window_stats.mean()
            for column in stat_columns:
                if window_std[column] < std_threshold and window_mean[column] > mean_threshold:
                    stability[player_id][f'Game Threshold {column}'] = i+window_size
                    break
            i+=1

    return stability

In [45]:
stability = sliding_window_stability(player_dfs_2023)

In [44]:
stability[641355]

{'Season BA': 0.307,
 'Season OBP': 0.356,
 'Season SLG': 0.525,
 'Season OPS': 0.881,
 'Season wOBA': 0.46717902350813745,
 'Season K%': 0.1564748201438849,
 'Season BB%': 0.07194244604316546,
 'Season ISO': 0.218436873747495,
 'Season wRC+': 21.774321880650994}

In [None]:
def compute_stability(dfs:dict, statistic:str) -> dict:
    """
    Computes the stability using different statisitcal techniques for all players
    """
    for key, df in dfs.items():
        print(f"Computing stability for {df['Name'].iloc[0]}")
        


    return