In [1]:
import pandas as pd
import os

In [2]:
loc = './stats/'
seasons = ['2018-19', '2021-22']
MIN_MATCHUP_MINS = 8

In [3]:
def getIO(season):
    # Inputs
    def_data = pd.read_csv(loc + season + '_def_stats.csv')
    off_data = pd.read_csv(loc + season + '_off_stats.csv')
    # Output
    h2h_df = pd.read_csv(loc + season + '_h2h_stats.csv')

    def_data = def_data.fillna(0)
    off_data = off_data.fillna(0)

    # Get def stats only from selected defenders (rename player_id to def_player_id to merge arrays)
    def_data = def_data.add_prefix('DEF_')
    def_data.rename(columns={'DEF_SEASON_ID': 'SEASON_ID', 'DEF_DEF_RATING': 'DEF_RATING', 
                        'DEF_DEF_BOXOUTS': 'DEF_BOXOUTS'}, inplace=True)

    # Ensure dataframes are equal length (merge defensive data first)
    def_df = pd.merge(def_data, h2h_df[['DEF_PLAYER_ID', 'SEASON_ID']], how='inner', on=['DEF_PLAYER_ID', 'SEASON_ID'])
    h2h_df = pd.merge(h2h_df, def_data[['DEF_PLAYER_ID', 'SEASON_ID']], how='inner', on=['DEF_PLAYER_ID', 'SEASON_ID'])

    def_df.sort_values(['DEF_PLAYER_ID'], inplace=True)
    h2h_df.sort_values(['DEF_PLAYER_ID'], inplace=True)

    # Add offensive player to defensive dataframe (helps merging offensive stats)
    def_df['OFF_PLAYER_ID'] = h2h_df['OFF_PLAYER_ID'].to_numpy()
    def_df['OFF_PLAYER_NAME'] = h2h_df['OFF_PLAYER_NAME'].to_numpy()

    def_df = def_df.reset_index(drop=True)
    h2h_df = h2h_df.reset_index(drop=True)

    # Get off stats only from selected offensive players
    off_data = off_data.add_prefix('OFF_')
    off_data.rename(columns={'OFF_SEASON_ID': 'SEASON_ID'}, inplace=True)
    combine = pd.merge(def_df, off_data, how='inner', on=['OFF_PLAYER_ID', 'SEASON_ID'])

    # Get correct sort
    combine.sort_values(['DEF_PLAYER_ID', 'OFF_PLAYER_ID'], inplace=True)
    h2h_df.sort_values(['DEF_PLAYER_ID', 'OFF_PLAYER_ID'], inplace=True)

    return combine, h2h_df

In [4]:
# Ensure directory exists
if not os.path.exists(loc):
    os.makedirs(loc)

# Set seasons to increment over
season = seasons[0]
final = str(int(seasons[1][0:4]) + 1) + '-' + str(int(seasons[1][5:7]) + 1)

# Initialize merge arrays
merged_inputs = pd.DataFrame()
merged_outputs = pd.DataFrame()

while season != final:
    inputs, outputs = getIO(season)

    merged_inputs = pd.concat([inputs, merged_inputs])
    merged_outputs = pd.concat([outputs, merged_outputs])

    # Increment season
    season = str(int(season[0:4]) + 1) + '-' + str(int(season[5:7]) + 1)

# Reset indeces
merged_inputs = merged_inputs.reset_index(drop=True)
merged_outputs = merged_outputs.reset_index(drop=True)

In [5]:
# Output to CSV
merged_inputs.to_csv(loc + 'inputs.csv', index=False)
merged_outputs.to_csv(loc + 'outputs.csv', index=False)