In [1]:
import pandas as pd 
import numpy as np
from tqdm import tqdm
pd.options.display.max_columns = None
pd.options.display.max_rows = 200
pd.options.mode.chained_assignment = None 

### Number of Games You Want to Go Back For Averages

In [2]:
num_games = 2

In [3]:
season_2020 = pd.read_csv('data/Season(2019-20).csv')
season_2019 = pd.read_csv('data/Season(2018-19).csv')
roto_2020 = pd.read_csv('data/RotoGuru(19-20).csv', delimiter = ':')
roto_2019 = pd.read_csv('data/RotoGuru(18-19).csv', delimiter = ':')

In [4]:
season18_20 = season_2020.append(season_2019, ignore_index = True)
roto18_20 = roto_2020.append(roto_2019, ignore_index = True)

In [5]:
roto_2020.head(1)

Unnamed: 0,GID,"Last, First",First Last,Date,Team,Opp,H/A,GameID,GTime(ET),Team pts,Opp pts,Start,Minutes,GP,active,FDP,DKP,DDP,YHP,Stats,DoubleD,TripleD,FD Sal,FD Change,DK Sal,DK Change,DD Sal,DD Change,YH Sal,YH Change,FD pos,DK pos,DD pos,YH pos,ADI,VMI,Unnamed: 36
0,5679,"Eubanks, Drew",Drew Eubanks,20200308,sas,cle,A,20200308-sas-cle,19.5,,,,,,,,,,,,,,3500.0,0.0,3000.0,0.0,,,10.0,0.0,5.0,5.0,,5.0,*,*,


### Merging Fantasy and Roto

In [6]:
def organize_roto(r_df): 
    '''This organizes the rotoguru dataframe so that the columns match the season dataframe.  Also gets rid of whitespaces
    and reformats date'''
    r_df = r_df[['GameID', 'First  Last', 'Date', 'Team','FDP', 'FD Sal', 'FD Change', 'FD pos']]
    r_df['Name'] = np.array(r_df['First  Last'])
    r_df['FDS'] = np.array(r_df['FD Sal'])
    r_df['FD_change'] = np.array(r_df['FD Change'])
    r_df['FD_pos'] = np.array(r_df['FD pos'])
    new_df = r_df.drop(['First  Last', 'FD Sal', 'FD Change', 'FD pos'], axis =1)[['GameID', 'Name','Date', 'Team','FDP', 'FDS', 'FD_change', 'FD_pos']]
    return new_df

def organize_stat(stat_df): 
    new_df = stat_df
    new_df['Team'] = new_df.Team.map(lambda x: x.lower())
    new_df['OPP'] = new_df.OPP.map(lambda x: x.lower())
    new_df['GameID'] = new_df.GameID.map(lambda x: x.lower())
    return new_df
def merge_tables(stat_df, roto_df, season):
    '''This function will merge the dataframes'''
    roto_df = organize_roto(roto_df)
    stat_df = organize_stat(stat_df)
    merge_df = pd.merge(roto_df, stat_df, on = ['GameID', 'Name', 'Team', 'Date'], how = 'inner')
    merge_df = merge_df.sort_values(by = ['GameID', 'Team'])    
    merge_df.to_csv(f'NewData/Merge({season})[{num_games}].csv', index = False)
    return merge_df
merge_2020 = merge_tables(season_2020, roto_2020, '19-20')
merge_2019 = merge_tables(season_2019, roto_2019, '18-19')

### Getting Averages of Merged DF

In [7]:
def get_averages(df, season): 
    '''This function will get the average for specified amount of previous games.  It will also format 
    the predictive value (next fdp scored) for the NN model'''
    col = ['MP', 'FG', 'FGA', 'FG%', '3P', '3PA',
                  '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
                  'TOV', 'PF', 'PTS', '+-', 'FDP','FDS']
    add_col = [f'{num_games}_{i}' for i in col]
    new_df = pd.DataFrame(columns = add_col)
    player_list = np.array(df.Name.value_counts().index)
    for player in tqdm(player_list, desc = f'Season {season}'):
        player_df = df[df.Name == player]
        player_df = player_df.sort_values(by = 'Date')
       
        for c in col: 
            avg = []
            for row in range(len(player_df)):
                if row < num_games: 
                    avg.append(None)
                    continue 
                last = player_df.iloc[row-num_games:row][f'{c}'].mean()
                avg.append(last)
            player_df[f'{num_games}_{c}'] = avg

        new_df = new_df.append(player_df,ignore_index = True)
        
    new_df = new_df[[i for i in new_df.columns.to_list() if i not in add_col] + [i for i in new_df.columns.to_list() if i in add_col]]    
    new_df.sort_values(by = ['GameID', 'Team'], inplace = True)
    new_df.to_csv(f'NewData/AVG({season})[{num_games}].csv')
    return new_df


merge_2020 = get_averages(merge_2020, season = '19-20')
merge_2019 = get_averages(merge_2019, season = '18-19')

Season 19-20: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 471/471 [05:04<00:00,  1.55it/s]
Season 18-19: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 488/488 [06:55<00:00,  1.18it/s]


In [15]:
merge_2020.append(merge_2019, ignore_index = True).to_csv(f'CompleteMerge(2018-2020)[{num_games}].csv')