In [1]:
# Need to run this to import the api_data package
import sys
sys.path.append('/home/cdelong/Python-Projects/FF-Sim/Repo-Work/FF-Sim')

In [2]:
import pandas as pd
import numpy as np

from api_data import ApiData
from pull_initial_data.compile_league_data import compile_league_data

pd.options.display.max_columns = None
pd.options.display.max_rows = 100

output_folder = '/home/cdelong/Python-Projects/FF-Sim/Simulation-Data/league_data'

df_dict = compile_league_data(output_folder)
df_dict.keys()

dict_keys(['teams', 'team_player_scores', 'settings', 'weeks', 'team_scores', 'divisions'])

In [None]:
'''
THINGS TO CHECK:
    - Number of team/weeks is complete for all regular season matchups
    - Way to identify which teams actually made the playoffs
    - Actual points from team_player_scores align with team_scores
    - Any leagues with multiple weeks per matchup?
    - Validate playoff teams flag
    
THINGS TO ADD:
    - Starting positions for each league
        - Need to update player scores to reflect this
    - Actual standings by week?
        - Do by division
    - Need to account for regular season bye weeks
        - These should not count towards w/l but what about score?
    - team_scores actually at matchup/team level rather than week/team level
        - Need to fix this but keep update standings to be based off of matchup/team level
'''

In [3]:
df_dict['settings']

Unnamed: 0.1,Unnamed: 0,playoff_seeding_rule,playoff_seeding_rule_by,num_playoff_teams,first_scoring_period,final_scoring_period,matchup_period_count,playoff_week_start,scoring_type,reg_season_matchup_tiebreaker,playoff_matchup_tiebreaker,home_team_bonus,league_id,season_id
0,0,TOTAL_POINTS_SCORED,0,4,1,17,13,14,H2H_POINTS,NONE,NONE,0,24775634,2019
0,0,TOTAL_POINTS_SCORED,0,4,1,17,13,14,H2H_POINTS,NONE,NONE,0,24765771,2019
0,0,TOTAL_POINTS_SCORED,0,4,1,17,13,14,H2H_POINTS,NONE,NONE,0,24722759,2019
0,0,TOTAL_POINTS_SCORED,0,6,1,16,13,14,H2H_POINTS,NONE,NONE,0,24756669,2019
0,0,TOTAL_POINTS_AGAINST,0,6,1,16,13,14,H2H_POINTS,NONE,NONE,0,24779438,2019
0,0,TOTAL_POINTS_SCORED,0,6,1,16,13,14,H2H_POINTS,SLOT_POINTS,NONE,0,24757340,2019
0,0,TOTAL_POINTS_SCORED,0,6,1,17,14,15,H2H_POINTS,NONE,NONE,0,24712271,2019
0,0,TOTAL_POINTS_SCORED,0,4,1,17,13,14,H2H_POINTS,NONE,NONE,0,24737914,2019
0,0,TOTAL_POINTS_SCORED,0,6,1,16,13,14,H2H_POINTS,SLOT_POINTS,NONE,0,24710723,2020
0,0,TOTAL_POINTS_SCORED,0,4,1,17,13,14,H2H_POINTS,NONE,NONE,0,24749365,2019


In [5]:
import api_data.scores as scores

standings_metrics = {'1-12': [['cum_total_wins', 'cum_score'], [False, False]]}

# espn_data = ApiData(2019, 24693394, standings_metrics=standings_metrics)
espn_data = ApiData(2021, 48347143, standings_metrics=standings_metrics)

settings = espn_data._pull_raw_settings()
# settings['settings']['scheduleSettings']
# settings['settings']['scoringSettings']
settings['status']

{'activatedDate': 1624123798733,
 'createdAsLeagueType': 3,
 'currentLeagueType': 0,
 'currentMatchupPeriod': 17,
 'finalScoringPeriod': 17,
 'firstScoringPeriod': 1,
 'isActive': True,
 'isExpired': False,
 'isFull': True,
 'isPlayoffMatchupEdited': False,
 'isToBeDeleted': False,
 'isViewable': False,
 'isWaiverOrderEdited': False,
 'latestScoringPeriod': 19,
 'previousSeasons': [2019, 2020],
 'standingsUpdateDate': 1641809676978,
 'teamsJoined': 12,
 'transactionScoringPeriod': 19,
 'waiverLastExecutionDate': 1642148891659,
 'waiverProcessStatus': {'2021-09-11T07:42:58.721+0000': 3,
  '2021-09-16T07:23:39.526+0000': 6,
  '2021-09-23T08:07:01.276+0000': 4,
  '2021-09-24T10:11:38.560+0000': 1,
  '2021-09-25T08:17:32.462+0000': 1,
  '2021-09-30T07:55:28.062+0000': 4,
  '2021-10-02T08:00:00.290+0000': 1,
  '2021-10-07T07:23:04.765+0000': 8,
  '2021-10-14T07:52:13.887+0000': 3,
  '2021-10-21T07:20:09.233+0000': 5,
  '2021-10-24T08:21:29.016+0000': 1,
  '2021-10-28T08:16:49.093+0000': 4,


In [25]:
def find_leagues_w_non_missing_team_weeks(df_player_score: pd.DataFrame, df_settings: pd.DataFrame) -> pd.DataFrame:
    """ Returns df which flags Season/Leagues missing at least one week of data. """

    agg_vars = ['season_id', 'league_id', 'team_id', 'week_number']

    season_lg_vars = ['season_id', 'league_id']
    season_lg_team_vars = ['season_id', 'league_id', 'team_id']

    df_player_score = df_player_score.copy()
    
    keep_vars = ['season_id', 'league_id', 'first_scoring_period', 'final_scoring_period']
    df_settings = df_settings[keep_vars].copy()
    df_settings['num_weeks'] = df_settings['final_scoring_period'] - df_settings['first_scoring_period'] + 1

    # Agg to the Season/League/Team/Week level to ensure counts are done properly
    initial_agg_df = df_player_score.groupby(agg_vars, as_index=False).size()

    # Get the number of weeks with data by Season/League/Team
    num_lg_team_weeks_df = initial_agg_df.groupby(season_lg_team_vars, as_index=False).size()

    # Merge on max weeks with data to primary df and add missing week id
    num_lg_team_weeks_df = pd.merge(num_lg_team_weeks_df, df_settings, on=season_lg_vars)
    num_lg_team_weeks_df['missing_week_ind'] = num_lg_team_weeks_df['size'] - num_lg_team_weeks_df['num_weeks']

    # Aggregate the missing week indicator by Season/League
    id_missing_weeks_df = num_lg_team_weeks_df.groupby(season_lg_vars, as_index=False)['missing_week_ind'].max()
    id_missing_weeks_df.rename(columns={'missing_week_ind': 'missing_week_ind_final'}, inplace=True)

    final_df = id_missing_weeks_df[['season_id', 'league_id', 'missing_week_ind_final']]

    return final_df


##########################################################################################################
################# Season/Leagues with missing weeks in the team_player_scores data #######################
##########################################################################################################

check_missing_weeks = find_leagues_w_non_missing_team_weeks(df_dict['team_player_scores'], df_dict['settings'])
check_missing_weeks.loc[check_missing_weeks['missing_week_ind_final'] == 1]

Unnamed: 0,season_id,league_id,missing_week_ind_final


In [5]:
def _agg_player_scores(df_player_score: pd.DataFrame) -> pd.DataFrame:
    """ Returns the Projected Points data aggregated to the Season/League/Week level """

    df_player_score = df_player_score.copy()

    # Including the starter indiciator in order to include starter and bench points
    groupby_vars = ['season_id', 'league_id', 'week_number', 'team_id', 'starter_flag']
    sum_vars = ['projected_points', 'actual_points']
    df_player_score = df_player_score.groupby(groupby_vars, as_index=False)[sum_vars].sum()

    id_vars = ['season_id', 'league_id', 'week_number', 'team_id']
    df_player_score = df_player_score.pivot_table(index=id_vars, columns='starter_flag'
                                                  , aggfunc=sum, fill_value=0)

    # Pivot table causes multi-dimensional column names and id_vars become the index
    df_player_score.columns = ['{}_{}'.format(x[0], 'starter') if x[1] == 1
                               else '{}_{}'.format(x[0], 'bench') for x in df_player_score.columns]
    df_player_score = df_player_score.reset_index().rename_axis(None, axis=1)

    final_df = df_player_score

    return final_df


def _merge_player_scores_w_team_scores(df_players_scores, df_scores):
    """ Returns a dataframe that merges the proj_data and matchup_data"""

    keep_vars = ['season_id', 'league_id', 'week_number', 'team_id', 'actual_points_starter']
    df_agg_player_scores = _agg_player_scores(df_players_scores)[keep_vars]
    df_agg_player_scores['player_score_flag'] = 1
    
    keep_vars = ['season_id', 'league_id', 'week_number', 'team_id', 'score']
    df_scores = df_scores[keep_vars].copy()
    df_scores['score_flag'] = 1

    # keep_vars = ['season_id', 'league_id', 'week_number', 'teamId', 'score']
    # matchup_data = matchup_data[keep_vars]

    # Merge on each team's projected data
    by_vars=['season_id', 'league_id', 'week_number', 'team_id']
    df = pd.merge(df_agg_player_scores, df_scores, on=by_vars, how='outer')

    # Update the week number and team ids for records with missing matchup data
#     df['week_number'].loc[df['week_number'].isnull()] = df['Week']
#     df['teamId'].loc[df['teamId'].isnull()] = df['Team']

#     drop_vars = ['Week', 'Team', 'season_id']
#     df.drop(columns=drop_vars, inplace=True)

#     df['ProjPoints_Matchup_Score_Diff'] = abs(df['Actual_Starter'] - df['MatchupData_Score'])
#     df['ProjPoints_Matchup_Score_Diff'] = df['ProjPoints_Matchup_Score_Diff'].round(2)

    return df

# _agg_player_scores(df_dict['team_player_scores'])
df = _merge_player_scores_w_team_scores(df_dict['team_player_scores'], df_dict['team_scores'])

df.loc[(check['player_score_flag'] == 1) & (df['score_flag'].isnull() == True)]
# df.loc[(df['player_score_flag'].isnull() == True) & (df['score_flag'].isnull() == 1)]

Unnamed: 0,season_id,league_id,week_number,team_id,actual_points_starter,player_score_flag,score,score_flag
117,2019,24693394,14.0,1.0,89.86,1.0,,
118,2019,24693394,14.0,2.0,110.40,1.0,,
119,2019,24693394,14.0,3.0,148.16,1.0,,
120,2019,24693394,14.0,4.0,147.62,1.0,,
121,2019,24693394,14.0,5.0,184.80,1.0,,
...,...,...,...,...,...,...,...,...
12216,2020,24786712,16.0,16.0,60.00,1.0,,
12217,2020,24786712,16.0,17.0,41.00,1.0,,
12218,2020,24786712,16.0,18.0,84.00,1.0,,
12219,2020,24786712,16.0,19.0,5.00,1.0,,


In [3]:
import api_data.scores as scores

standings_metrics = {'1-4': [['cum_total_wins', 'cum_score'], [False, False]],
                     '5-6': [['cum_total_wins', 'cum_score'], [False, False]],
                     '7-12': [['cum_total_wins', 'cum_score'], [False, False]]}

espn_data = ApiData(2019, 24693394, standings_metrics=standings_metrics)

# espn_data.pull_all_data()

In [10]:
import api_data.scores as scores

pd.options.display.max_rows = 500

matchup = espn_data._pull_raw_matchup()
# matchup.keys()

# for period in matchup['schedule']:
#     print(period['matchupPeriodId'])

df_matchup_data = scores.create_matchup_df(matchup, playoff_week_start=14)
df_expanded = scores.expand_matchup_data(df_matchup_data)

# check = scores.pull_standings(matchup, 14)
len(df_matchup_data)
# len(df_expanded)

85

In [68]:
# for period in matchup['schedule']:
#     print(period.keys())

print(matchup['schedule'][0]['away'].keys())

# for i, match in enumerate(matchup['schedule']):
#     try:
#         team_id = match['away']['teamId']
#         points_by = list(match['away']['pointsByScoringPeriod'].values())[0]
#         total_points = match['away']['totalPoints']
#         print(team_id, 'Points:', points_by, 'Total Points:', total_points)
#         print(match['away']['pointsByScoringPeriod'], i)
#     except:
#         pass

# print(matchup['schedule'][0]['away']['pointsByScoringPeriod'].values())

check = matchup['schedule'][0]['away']['pointsByScoringPeriod']

for key, val in check.items():
    print(key, val)

dict_keys(['adjustment', 'cumulativeScore', 'gamesPlayed', 'pointsByScoringPeriod', 'teamId', 'tiebreak', 'totalPoints'])
1 131.66


In [24]:
espn_data.weeks

Unnamed: 0,week_number,matchup_period,reg_season_flag,season_id,league_id
0,1,1,1,2019,24693394
1,2,2,1,2019,24693394
2,3,3,1,2019,24693394
3,4,4,1,2019,24693394
4,5,5,1,2019,24693394
5,6,6,1,2019,24693394
6,7,7,1,2019,24693394
7,8,8,1,2019,24693394
8,9,9,1,2019,24693394
9,10,10,1,2019,24693394


In [51]:
df_agg_player_scores = _agg_player_scores(df_dict['team_player_scores'])
df_player_scores = df_dict['team_player_scores']
df_scores = df_dict['team_scores']

df = df_scores

df.loc[(df['season_id'] == 2020) & (df['league_id'] == 24786712)
#                     & (df['week_number'] == 16) & (df['team_id'] == 16)
                    ]

Unnamed: 0.1,Unnamed: 0,week_number,matchup_period,team_id_opp,score_opp,team_id,score,week_type,home_or_away,win_ind,loss_ind,tie_ind,wlt_points,all_play_wlt_points,all_play_wins,all_play_ties,all_play_losses,cum_score,cum_wins,cum_all_play_wlt_points,cum_ties,cum_losses,cum_wlt_points,cum_all_play_wins,cum_all_play_losses,cum_all_play_ties,cum_score_opp,record,all_play_record,cum_score_per_week,cum_score_opp_per_week,cum_all_play_wlt_points_per_week,standings,level_0,index,season_id,league_id
0,0,1.0,1.0,19.0,74.0,13.0,127.0,Regular,away,1.0,0.0,0.0,1.0,19.0,19.0,0.0,0.0,127.0,1.0,19.0,0.0,0.0,1.0,19.0,0.0,0.0,74.0,1-0-0,19-0-0,127.00,74.00,19.0,1,0.0,13.0,2020,24786712
1,1,1.0,1.0,16.0,100.0,4.0,107.0,Regular,home,1.0,0.0,0.0,1.0,18.0,18.0,0.0,1.0,107.0,1.0,18.0,0.0,0.0,1.0,18.0,1.0,0.0,100.0,1-0-0,18-1-0,107.00,100.00,18.0,2,1.0,3.0,2020,24786712
2,2,1.0,1.0,9.0,71.0,3.0,106.0,Regular,away,1.0,0.0,0.0,1.0,17.0,17.0,0.0,2.0,106.0,1.0,17.0,0.0,0.0,1.0,17.0,2.0,0.0,71.0,1-0-0,17-2-0,106.00,71.00,17.0,3,2.0,2.0,2020,24786712
3,3,1.0,1.0,1.0,49.0,11.0,105.0,Regular,home,1.0,0.0,0.0,1.0,16.0,16.0,0.0,3.0,105.0,1.0,16.0,0.0,0.0,1.0,16.0,3.0,0.0,49.0,1-0-0,16-3-0,105.00,49.00,16.0,4,3.0,11.0,2020,24786712
4,4,1.0,1.0,4.0,107.0,16.0,100.0,Regular,away,0.0,1.0,0.0,0.0,15.0,15.0,0.0,4.0,100.0,0.0,15.0,0.0,1.0,0.0,15.0,4.0,0.0,107.0,0-1-0,15-4-0,100.00,107.00,15.0,5,0.0,16.0,2020,24786712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,151,13.0,13.0,10.0,96.0,3.0,58.0,Regular,home,0.0,1.0,0.0,0.0,6.0,6.0,0.0,13.0,997.0,8.0,145.5,0.0,5.0,8.0,143.0,99.0,5.0,910.0,8-5-0,143-99-5,76.69,70.00,11.2,8,1.0,242.0,2020,24786712
152,152,13.0,13.0,11.0,72.0,8.0,63.0,Regular,away,0.0,1.0,0.0,0.0,9.0,9.0,0.0,10.0,963.0,8.0,142.0,0.0,5.0,8.0,141.0,104.0,2.0,913.0,8-5-0,141-104-2,74.08,70.23,10.9,9,2.0,247.0,2020,24786712
153,153,13.0,13.0,15.0,29.0,6.0,76.0,Regular,away,1.0,0.0,0.0,1.0,12.0,12.0,0.0,7.0,940.0,7.0,136.0,1.0,5.0,7.5,135.0,110.0,2.0,891.0,7-5-1,135-110-2,72.31,68.54,10.5,10,3.0,245.0,2020,24786712
154,154,13.0,13.0,3.0,58.0,10.0,96.0,Regular,away,1.0,0.0,0.0,1.0,18.0,18.0,0.0,1.0,1002.0,6.0,152.5,1.0,6.0,6.5,151.0,93.0,3.0,864.0,6-6-1,151-93-3,77.08,66.46,11.7,11,4.0,249.0,2020,24786712
