In [1]:
import pandas as pd
from nba_api.stats.static import teams
from nba_api.stats.endpoints import playergamelogs
import numpy as np

In [2]:
df = pd.DataFrame()
start_year = 17

while start_year < 25:
  year_string = f"20{start_year}-{start_year+1}"
  p_df = playergamelogs.PlayerGameLogs(season_nullable=year_string).get_data_frames()[0]
  df = pd.concat([df, p_df])
  start_year += 1

print(df.shape)

(186595, 69)


In [3]:
pd.set_option('display.max_columns', None)

In [4]:
df.rename(columns={'TEAM_ABBREVIATION': 'P_TEAM_ABBR'}, inplace=True)
df['A_TEAM_ABBR'] = df['MATCHUP'].str.split(' ').str[-1]
teams_df = teams.get_teams()
teams_df = pd.DataFrame(teams_df)
df = df.merge(teams_df[['id', 'abbreviation']], left_on='A_TEAM_ABBR', right_on='abbreviation', how='left')
df.rename(columns={'id': 'A_TEAM_ID'}, inplace=True)
df.drop(columns=['abbreviation'], inplace=True)


In [5]:
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df = df.sort_values(by=['PLAYER_ID', 'A_TEAM_ID', 'GAME_DATE'])

historic_vs_team = []
for index, row in df.iterrows():
    # Filter the dataframe to get all games before the current game's date for the same player and team
    past_games = df[
        (df['PLAYER_ID'] == row['PLAYER_ID']) & 
        (df['A_TEAM_ID'] == row['A_TEAM_ID']) & 
        (df['GAME_DATE'] < row['GAME_DATE'])
    ]
    
    if not past_games.empty:
        # Calculate the average points from the past games
        average_points = past_games['PTS'].mean()
    else:
        # If no past games, use the current game's points
        average_points = row['PTS']
    
    # Append the calculated value to the list
    historic_vs_team.append(average_points)
  
df['HISTORIC_VS_TEAM'] = historic_vs_team

In [6]:
df = df.sort_values(by=['PLAYER_ID', 'SEASON_YEAR', 'GAME_DATE'])

weighted_moving_avg = []
weights = np.arange(10, 0, -1)

for index, row in df.iterrows():
    # Filter the dataframe to get the last 10 games of the same player in the same season
    past_games = df[
        (df['PLAYER_ID'] == row['PLAYER_ID']) &
        (df['SEASON_YEAR'] == row['SEASON_YEAR']) &
        (df['GAME_DATE'] < row['GAME_DATE'])
    ].head(10)  # Get the last 10 games
    
    if not past_games.empty:
        # Use weights for the weighted moving average
        game_count = len(past_games)
        current_weights = weights[:game_count]  # Use only the available weights
        weighted_avg = np.average(past_games['PTS'], weights=current_weights)
    else:
        # If no past games, set to NaN
        weighted_avg = np.nan
    
    # Append the calculated value to the list
    weighted_moving_avg.append(weighted_avg)

df['WMA_PPG_LAST_10'] = weighted_moving_avg

KeyboardInterrupt: 