In [1]:
# Objective: Regression model to predict the number of points/assists/rebounds/steals/blocks/turnovers per game for a given player in a given game

# Features: Depending on training time, I'm hoping to includle as many features, and as many interaction terms as possible. NBA endpoints contain a lot of data, so I'm hoping to use as much as possible
# Target: Points/Assists/Rebounds/Steals/Blocks/Turnovers per game

# https://github.com/swar/nba_api/blob/master/docs/examples/Home%20Team%20Win-Loss%20Modeling/Home%20Team%20Win-Loss%20Data%20Prep.ipynb
# Lots of code was taken from this example. I'm hoping to use this as a starting point for my own model

In [None]:
import requests
import time
from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import CommonTeamRoster, playercareerstats, leagueseasonmatchups, leaguegamefinder
import pandas as pd
import numpy as np
# import cupy as np
import json
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

: 

In [3]:
# Use decorator @retry when making requests to the API. 
def retry(func, retries=3):
    def retry_wrapper(*args, **kwargs):
        attempts = 0
        while attempts < retries:
            try:
                return func(*args, **kwargs)
            except requests.exceptions.RequestException as e:
                print(e)
                time.sleep(30)
                attempts += 1

    return retry_wrapper

In [4]:
@retry
def get_roster(teamID, season):
    team = CommonTeamRoster(team_id = teamID, season = season).get_normalized_json()
    team = pd.DataFrame(json.loads(team)['CommonTeamRoster'])
    return team

In [5]:

def predict(playerName: str, stat: str, opponent: str, date: str):
    xcols = ['OREB', 'DREB']
    ycol = stat
    playerID = players.find_players_by_full_name(
        playerName)[0]['id']  # regex pattern matching
    oppID = teams.find_teams_by_full_name(
        opponent)[0]['id']  # regex pattern matching

    # Temporarily, only train on the players full box score stats for the season.
    # Later, we can actually create a model with real data.

    # Get the players box score stats for the season
    player = pd.DataFrame(json.loads(playercareerstats.PlayerCareerStats(
        player_id=playerID).get_normalized_json())['SeasonTotalsRegularSeason'])
    # player = player[player['SEASON_ID'] == '2019-20']

    m1 = LinearRegression()
    print(player)
    m1.fit(m1, player[xcols], player[ycol])
    print(m1.score())
  
    return (player)


predict('Trae Young', 'PTS', 'HAWKS', '2020-01-01')


   PLAYER_ID SEASON_ID LEAGUE_ID     TEAM_ID TEAM_ABBREVIATION  PLAYER_AGE  \
0    1629027   2018-19        00  1610612737               ATL        20.0   
1    1629027   2019-20        00  1610612737               ATL        21.0   
2    1629027   2020-21        00  1610612737               ATL        22.0   
3    1629027   2021-22        00  1610612737               ATL        23.0   
4    1629027   2022-23        00  1610612737               ATL        24.0   

   GP  GS     MIN  FGM  ...  FT_PCT  OREB  DREB  REB  AST  STL  BLK  TOV   PF  \
0  81  81  2503.0  525  ...   0.829    64   237  301  653   72   15  308  140   
1  60  60  2120.0  546  ...   0.860    32   223  255  560   65    8  289  104   
2  63  63  2125.0  487  ...   0.886    38   207  245  594   53   12  261  111   
3  76  76  2652.0  711  ...   0.904    50   234  284  737   72    7  303  128   
4  73  73  2541.0  597  ...   0.886    56   161  217  741   80    9  300  104   

    PTS  
0  1549  
1  1778  
2  1594  
3  2

ValueError: Expected 2D array, got scalar array instead:
array=LinearRegression().
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [19]:
@retry
def get_all_games(season, player_id):
    """Returns all games played by a given player"""
    games = leaguegamefinder.LeagueGameFinder(player_or_team_abbreviation='P', season_nullable = season, player_id_nullable = player_id).get_data_frames()[0]
    return games

games = get_all_games('2019-20', player_id = '203076')
games


Unnamed: 0,SEASON_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,42019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0041900406,2020-10-11,LAL @ MIA,W,...,0.714,4,11,15,3,1,2,3,4,18
1,42019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0041900405,2020-10-09,LAL vs. MIA,L,...,1.000,3,9,12,3,3,3,2,3,7
2,42019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0041900404,2020-10-06,LAL @ MIA,W,...,1.000,0,9,9,4,1,4,2,2,17
3,42019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0041900403,2020-10-04,LAL @ MIA,L,...,1.000,2,3,5,3,2,0,5,4,-26
4,42019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0041900402,2020-10-02,LAL vs. MIA,W,...,1.000,8,6,14,1,1,0,3,4,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,22019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0021900002,2019-10-22,LAL @ LAC,L,...,0.643,3,6,9,5,1,2,3,3,3
86,12019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0011900068,2019-10-16,LAL vs. GSW,W,...,0.667,1,9,10,8,1,2,3,2,19
87,12019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0011900044,2019-10-12,LAL @ BKN,L,...,1.000,0,3,3,0,1,2,0,0,-1
88,12019,203076,Anthony Davis,1610612747,LAL,Los Angeles Lakers,0011900031,2019-10-10,LAL vs. BKN,L,...,0.889,0,2,2,5,1,2,1,2,5


In [61]:
# Get the opposing team's box score stats for the season prior to the game. 
@retry
def get_team_avg(date, team_id, season):
    games = leaguegamefinder.LeagueGameFinder(player_or_team_abbreviation = 'T', season_nullable = season, date_to_nullable = date, team_id_nullable = team_id).get_data_frames()[0]

    # Average team box score stats for this time-period
    numeric_cols = ['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']
    avg = games[numeric_cols]
    avg = avg.mean()
    
    return avg

In [62]:
def make_df(player_id):
    '''Makes a dataframe containing a players boxscore stats + season stats for the opposing team'''
    player_df = get_all_games('2019-20', player_id).dropna().head()
    # Can't get leaguegamefinder to work with game_id_nullable/game_id, so we use a workaround. 

    opponent_df = pd.DataFrame()
    # Iterate over this players games
    for game in player_df.itertuples():
        dates = game.GAME_DATE.split('-')
        date = dates[1] + '/' + dates[2] + '/' + dates[0]
        team_id = teams.find_team_by_appreviation(game.MATCHUP.split(' ')[2])
        print(team_id)
        opponent_stats = get_team_avg(date, team_id, '2019-20')
        opponent_df = opponent_df.append(opponent_stats, ignore_index = True)
    
    return opponent_df

make_df('203076')

KeyboardInterrupt: 