In [15]:
import pandas as pd
from nba_api.stats.static import teams
from nba_api.stats.endpoints import playergamelogs
import numpy as np
from typing import List

In [4]:
df = pd.DataFrame()
start_year = 17

while start_year < 25:
  year_string = f"20{start_year}-{start_year+1}"
  p_df = playergamelogs.PlayerGameLogs(season_nullable=year_string).get_data_frames()[0]
  df = pd.concat([df, p_df])
  start_year += 1

print(df.columns.to_list())

['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3', 'WNBA_FANTASY_PTS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK', 'WNBA_FANTASY_PTS_RANK', 'AVAILABLE_FLAG', 'MIN_SEC']


In [18]:
def create_avg_over_season_columns(source: pd.DataFrame, names: List[str]) -> pd.DataFrame:
    """
    @parameter source: A dataframe containing atleast 1 season of data from the playergamelogs endpoint in the api
    @parameter names: a list of columns from the dataframe you wish to average
    
    Will create a new dataframe with player_id, game_id (unique identifiers) and the averages of the stats you request
    """
    # Sort the DataFrame (optional depending on your needs)
    source = source.sort_values(by=['PLAYER_ID', 'SEASON_YEAR'])

    new_column_names = []
    for name in names:
        season_avg = source.groupby(['PLAYER_ID', 'SEASON_YEAR'])[name].mean()
        column_name = f"{name}_SEASON_AVG"
        new_column_names.append(column_name)
    
        # Map the calculated averages back to the original DataFrame
        source[column_name] = source.set_index(['PLAYER_ID', 'SEASON_YEAR']).index.map(season_avg)

    new_column_names.append('PLAYER_ID')
    new_column_names.append('GAME_ID')
    # Return the desired DataFrame with PLAYER_ID, GAME_ID, and the new column
    return source[new_column_names]

In [19]:
cols_to_avg = ['PTS', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'REB', 'AST', 'STL', 'BLK']

new_df = create_avg_over_season_columns(source=df, names=cols_to_avg)
new_df.head()

Unnamed: 0,PTS_SEASON_AVG,FG_PCT_SEASON_AVG,FG3_PCT_SEASON_AVG,FT_PCT_SEASON_AVG,REB_SEASON_AVG,AST_SEASON_AVG,STL_SEASON_AVG,BLK_SEASON_AVG,PLAYER_ID,GAME_ID
189,5.396552,0.356897,0.283638,0.220121,2.551724,1.189655,0.724138,0.448276,1713,21701230
534,5.396552,0.356897,0.283638,0.220121,2.551724,1.189655,0.724138,0.448276,1713,21701210
1011,5.396552,0.356897,0.283638,0.220121,2.551724,1.189655,0.724138,0.448276,1713,21701188
1496,5.396552,0.356897,0.283638,0.220121,2.551724,1.189655,0.724138,0.448276,1713,21701167
1831,5.396552,0.356897,0.283638,0.220121,2.551724,1.189655,0.724138,0.448276,1713,21701155


In [20]:
def calculate_weighted_average_column(source: pd.DataFrame, num_of_games: int, names: List[str]) -> pd.DataFrame:
    source = source.sort_values(by=['PLAYER_ID', 'SEASON_YEAR', 'GAME_DATE'])

    weights = np.arange(num_of_games, 0, -1)
    def weighted_average(series):
        if len(series) < len(weights):
            current_weights = weights[:len(series)]
        else:
            current_weights = weights
        return np.average(series, weights=current_weights)
    
    wma_names = []
    for name in names:
        wma_col_name = f"WMA_{name}_LAST_{num_of_games}"
        wma_names.append(wma_col_name)

        source[wma_col_name] = (
            source.groupby(['PLAYER_ID', 'SEASON_YEAR'])[name]
            .rolling(window=num_of_games, min_periods=1)
            .apply(weighted_average, raw=True)
            .reset_index(level=[0, 1], drop=True)
        )
    
    wma_names.extend(['PLAYER_ID', 'GAME_ID'])

    return source[wma_names]

In [21]:
wma_names = ['PTS', 'FG_PCT', 'FG3_PCT', 'FT_PCT']
third_df = calculate_weighted_average_column(source=df, num_of_games=5, names=wma_names)
third_df.head()

TypeError: incompatible index of inserted column with frame index

In [4]:
df.rename(columns={'TEAM_ABBREVIATION': 'P_TEAM_ABBR'}, inplace=True)
df['A_TEAM_ABBR'] = df['MATCHUP'].str.split(' ').str[-1]
teams_df = teams.get_teams()
teams_df = pd.DataFrame(teams_df)
df = df.merge(teams_df[['id', 'abbreviation']], left_on='A_TEAM_ABBR', right_on='abbreviation', how='left')
df.rename(columns={'id': 'A_TEAM_ID'}, inplace=True)
df.drop(columns=['abbreviation'], inplace=True)


In [5]:
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df = df.sort_values(by=['PLAYER_ID', 'A_TEAM_ID', 'GAME_DATE'])

historic_vs_team = []
for index, row in df.iterrows():
    # Filter the dataframe to get all games before the current game's date for the same player and team
    past_games = df[
        (df['PLAYER_ID'] == row['PLAYER_ID']) & 
        (df['A_TEAM_ID'] == row['A_TEAM_ID']) & 
        (df['GAME_DATE'] < row['GAME_DATE'])
    ]
    
    if not past_games.empty:
        # Calculate the average points from the past games
        average_points = past_games['PTS'].mean()
    else:
        # If no past games, use the current game's points
        average_points = row['PTS']
    
    # Append the calculated value to the list
    historic_vs_team.append(average_points)
  
df['HISTORIC_VS_TEAM'] = historic_vs_team

In [6]:
df = df.sort_values(by=['PLAYER_ID', 'SEASON_YEAR', 'GAME_DATE'])

weighted_moving_avg = []
weights = np.arange(10, 0, -1)

for index, row in df.iterrows():
    # Filter the dataframe to get the last 10 games of the same player in the same season
    past_games = df[
        (df['PLAYER_ID'] == row['PLAYER_ID']) &
        (df['SEASON_YEAR'] == row['SEASON_YEAR']) &
        (df['GAME_DATE'] < row['GAME_DATE'])
    ].head(10)  # Get the last 10 games
    
    if not past_games.empty:
        # Use weights for the weighted moving average
        game_count = len(past_games)
        current_weights = weights[:game_count]  # Use only the available weights
        weighted_avg = np.average(past_games['PTS'], weights=current_weights)
    else:
        # If no past games, set to NaN
        weighted_avg = np.nan
    
    # Append the calculated value to the list
    weighted_moving_avg.append(weighted_avg)

df['WMA_PPG_LAST_10'] = weighted_moving_avg

KeyboardInterrupt: 