In [2]:
import numpy as np
import pandas as pd

In [3]:
# Found this partway through the analysis: https://sites.northwestern.edu/nusportsanalytics/2019/06/21/the-rise-of-triple-doubles-in-the-nba-examining-the-statistical-causes/
# Basically exactly what I want to do.

In [4]:
# Import high-level information
from nba_api.stats.static import teams
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog

In [5]:
# R to python reference
# https://gist.github.com/conormm/fd8b1980c28dd21cfaf6975c86c74d07

In [6]:
# Get the first and last names of all active players.
nbaPlayers = players.get_players()
activePlayers = [player['full_name'] for player in nbaPlayers 
                    if player['is_active'] == True]

In [7]:
# Pings the nba_api for a player across one or more seasons.
#   - playerName (string): player's full name in the nba_api db.
class PlayerStats():
    def __init__( self, playerName ):
        self.playerName = playerName
        self.playerLogData = pd.DataFrame()
        self.seasons = list()

    # Get the game log for a certain player, based on their full name, across certain seasons.
    #   - seasons (list of integers): seasons to pull game log data on.
    #   - players (nba_api "players" endpoint)
    def GetPlayerGameLog( self, seasons, players ):
        playerDict = players.get_players()
        curPlayer = [player for player in playerDict if player['full_name'] == self.playerName]
        
        playerLog = list()
        for curSeason in seasons:
            # Ping API separately for each season and extract response dataframe.
            playerLog.append( playergamelog.PlayerGameLog( player_id = curPlayer[0]['id'], season = curSeason ).get_data_frames()[0] )
        
        # Combine all seasons.
        playerLog = pd.concat(playerLog)
        # Add player name.
        playerLog = playerLog.assign( Player_Name = curPlayer[0]['full_name']  )
        self.playerLogData = playerLog
        self.seasons = seasons


In [8]:
LukaDoncicStats = PlayerStats(playerName = 'Luka Doncic' )

In [9]:
LukaDoncicStats.playerName

'Luka Doncic'

In [10]:
LukaDoncicStats.GetPlayerGameLog(seasons=[2020],players=players)

In [11]:
LukaDoncicStats.seasons

[2020]

In [12]:
LukaDoncicStats.playerLogData

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Player_Name
0,22020,1629029,0022001071,"MAY 16, 2021",DAL @ MIN,L,21,6,13,0.462,...,6,6,2,0,4,1,18,-15,1,Luka Doncic
1,22020,1629029,0022001057,"MAY 14, 2021",DAL vs. TOR,W,34,7,19,0.368,...,10,11,0,1,1,4,20,-5,1,Luka Doncic
2,22020,1629029,0022000154,"MAY 12, 2021",DAL vs. NOP,W,31,12,24,0.500,...,8,8,0,0,4,1,33,24,1,Luka Doncic
3,22020,1629029,0022001037,"MAY 11, 2021",DAL @ MEM,L,26,4,16,0.250,...,5,5,0,0,5,1,12,-23,1,Luka Doncic
4,22020,1629029,0022001021,"MAY 09, 2021",DAL @ CLE,W,22,5,11,0.455,...,5,5,2,0,1,2,15,4,1,Luka Doncic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,22020,1629029,0022000071,"JAN 01, 2021",DAL vs. MIA,W,36,9,22,0.409,...,15,7,0,1,5,2,27,12,1,Luka Doncic
62,22020,1629029,0022000059,"DEC 30, 2020",DAL vs. CHA,L,24,4,10,0.400,...,2,5,2,0,3,3,12,-27,1,Luka Doncic
63,22020,1629029,0022000031,"DEC 27, 2020",DAL @ LAC,W,26,8,18,0.444,...,9,8,1,0,2,0,24,29,1,Luka Doncic
64,22020,1629029,0022000008,"DEC 25, 2020",DAL @ LAL,L,34,9,19,0.474,...,4,7,0,1,3,0,27,-14,1,Luka Doncic


In [13]:
# Theory is that players hunt for double doubles and triple doubles. Plot distribution of 
# rebounds when 10 points or more
# assists when 10 points or more
# assists when 10 points or more and 10 rebounds or more.
# rebounds when 10 points or more and 10 assists or more.
# - PlayerStatsData is a PlayerStats class object.
class PlotGameLogStats():
    def __init__( self, playerStatsData, playerName ):
        self.playerStatsData = playerStatsData
        self.playerName      = playerName

    # Plot rebounds when 10 points or more.
    def ReboundsWhen10Points( self ):
        curPlayerData = self.playerStatsData

        plotData = curPlayerData[curPlayerData['PTS'] >= 10]
        plotData = plotData[['REB']]
        
        # Plot
        plot = plotData.plot.hist(bins = list(range(1,20)))
        plot.set_xlabel('Rebounds')
        plot.set_title(self.playerName +': Rebounds when 10 points or more')
        plot.locator_params(integer=True)
        return(plot)

    # Plot assists when 10 points or more.
    def AssistsWhen10Points( self ):
        curPlayerData = self.playerStatsData
        
        plotData = curPlayerData[curPlayerData['PTS'] >= 10]
        plotData = plotData[['AST']]

        # Plot
        plot = plotData.plot.hist(bins = list(range(1,20)))
        plot.set_xlabel('Assists')
        plot.set_title(self.playerName + ': Assists when 10 points or more')
        plot.locator_params(integer=True)
        return(plot)

    # Plot rebounds when 10 points or more and 10 assists or more.
    def Rebounds10PointsAndAssists( self ):
        curPlayerData = self.playerStatsData

        plotData = curPlayerData[(curPlayerData['PTS'] >= 10) & (curPlayerData['AST'] >= 10) ]
        plotData = plotData[['REB']]

        # Plot
        plot = plotData.plot.hist(bins = list(range(1,20)))
        plot.set_xlabel('Rebounds')
        plot.set_title(self.playerName + ': Rebounds when 10 points or more and 10 assists or more')
        plot.locator_params(integer=True)
        return(plot)

    # Plot assists when 10 points or more and 10 rebounds or more.
    def Assists10PointsAndRebounds( self ):
        curPlayerData = self.playerStatsData

        plotData = curPlayerData[(curPlayerData['PTS'] >= 10) & (curPlayerData['REB'] >= 10) ]
        plotData = plotData[['AST']]

        # Plot
        plot = plotData.plot.hist(bins = list(range(1,20)))
        plot.set_xlabel('Assists')
        plot.set_title(self.playerName + ': Assists when 10 points or more and 10 rebounds or more')
        plot.locator_params(integer=True)
        return(plot)
        

In [14]:
PlotLukaStats = PlotGameLogStats( playerStatsData = LukaDoncicStats.playerLogData, playerName = LukaDoncicStats.playerName )

In [15]:
PlotLukaStats.playerStatsData

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Player_Name
0,22020,1629029,0022001071,"MAY 16, 2021",DAL @ MIN,L,21,6,13,0.462,...,6,6,2,0,4,1,18,-15,1,Luka Doncic
1,22020,1629029,0022001057,"MAY 14, 2021",DAL vs. TOR,W,34,7,19,0.368,...,10,11,0,1,1,4,20,-5,1,Luka Doncic
2,22020,1629029,0022000154,"MAY 12, 2021",DAL vs. NOP,W,31,12,24,0.500,...,8,8,0,0,4,1,33,24,1,Luka Doncic
3,22020,1629029,0022001037,"MAY 11, 2021",DAL @ MEM,L,26,4,16,0.250,...,5,5,0,0,5,1,12,-23,1,Luka Doncic
4,22020,1629029,0022001021,"MAY 09, 2021",DAL @ CLE,W,22,5,11,0.455,...,5,5,2,0,1,2,15,4,1,Luka Doncic
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,22020,1629029,0022000071,"JAN 01, 2021",DAL vs. MIA,W,36,9,22,0.409,...,15,7,0,1,5,2,27,12,1,Luka Doncic
62,22020,1629029,0022000059,"DEC 30, 2020",DAL vs. CHA,L,24,4,10,0.400,...,2,5,2,0,3,3,12,-27,1,Luka Doncic
63,22020,1629029,0022000031,"DEC 27, 2020",DAL @ LAC,W,26,8,18,0.444,...,9,8,1,0,2,0,24,29,1,Luka Doncic
64,22020,1629029,0022000008,"DEC 25, 2020",DAL @ LAL,L,34,9,19,0.474,...,4,7,0,1,3,0,27,-14,1,Luka Doncic


In [17]:
# Create probability distribution function for each player/stat combination to get likelihood that each value occurs.
# Use conditional probability. e.g., what are the chances, if you have 10 or more points, that you also have 10 or more assists.

In [18]:
class StatDistributions():
    def __init__( self, PlayerStatsClass ):
        self.PlayerStatsClass = PlayerStatsClass
        self.Distributions = dict()
        self.playerName = PlayerStatsClass.playerName
        self.StatFilters = StatFilters = np.array([
                                {'selectVar': ['AST'], 'filterVars': ['PTS']},
                                {'selectVar': ['AST'], 'filterVars': ['PTS','REB']},
                                {'selectVar': ['REB'], 'filterVars': ['PTS']},
                                {'selectVar': ['REB'], 'filterVars': ['PTS','AST']}
])
    
    def IterOverStats( self ):
        curPlayerStatClass = self.PlayerStatsClass
        
        AllPlayerDistributions= dict()

        for i in range(0, len(self.StatFilters)):

            CurFilter = self.StatFilters[i]
            
            curDistribution = self.CalcDistribution( curPlayerStatClass, CurFilter )
            AllPlayerDistributions = { **AllPlayerDistributions, **curDistribution }
            
        self.Distributions = AllPlayerDistributions
        return(0)
    
    def CalcDistribution( self, curPlayerStatClass, CurFilter ):
        
        # Flatten list with variables for needed for analysis.
        allStats = [CurFilter['selectVar'], CurFilter['filterVars']]
        allStats = allStats[0] + allStats[1]
    
        curPlayerData = curPlayerStatClass.playerLogData
        curPlayerData = curPlayerData[allStats] # Keep only required columns for memory management.

        # Filter down to entries of 10 or more for both of the filter variables.
        for var in CurFilter['filterVars']:
            curPlayerData = curPlayerData[curPlayerData[var] >= 10]

        statDistribution = curPlayerData[CurFilter['selectVar'][0]].value_counts(normalize = True) # Calculate relative frequency.
        statDistribution = statDistribution.sort_index()

        curStatName = CurFilter['selectVar'][0] + '_' + 'when' + '_' +'_'.join(CurFilter['filterVars']) + '_gt_9'

        # Return results in a dictionary.
        d = dict()
        d[curStatName] = statDistribution

        return( d )

In [19]:
LukaStatDist = StatDistributions(LukaDoncicStats)

In [20]:
LukaStatDist.IterOverStats()

0

In [21]:
LukaStatDist.playerName

'Luka Doncic'

In [22]:
### Gather two datasets ###
# Stats for all current active players
# Distributions for all current active players 

In [23]:
activePlayers

['Steven Adams',
 'Bam Adebayo',
 'LaMarcus Aldridge',
 'Kyle Alexander',
 'Nickeil Alexander-Walker',
 'Grayson Allen',
 'Jarrett Allen',
 'Kadeem Allen',
 'Al-Farouq Aminu',
 'Justin Anderson',
 'Kyle Anderson',
 'Ryan Anderson',
 'Giannis Antetokounmpo',
 'Kostas Antetokounmpo',
 'Thanasis Antetokounmpo',
 'Carmelo Anthony',
 'OG Anunoby',
 'Ryan Arcidiacono',
 'Trevor Ariza',
 'D.J. Augustin',
 'Deandre Ayton',
 'Dwayne Bacon',
 'Marvin Bagley III',
 'Lonzo Ball',
 'Mo Bamba',
 'J.J. Barea',
 'Harrison Barnes',
 'RJ Barrett',
 'Will Barton',
 'Keita Bates-Diop',
 'Nicolas Batum',
 'Aron Baynes',
 'Kent Bazemore',
 'Darius Bazley',
 'Bradley Beal',
 'Malik Beasley',
 'Marco Belinelli',
 'Jordan Bell',
 "DeAndre' Bembry",
 'Dragan Bender',
 'Davis Bertans',
 'Patrick Beverley',
 'Khem Birch',
 'Goga Bitadze',
 'Bismack Biyombo',
 'Nemanja Bjelica',
 'Eric Bledsoe',
 'Bogdan Bogdanovic',
 'Bojan Bogdanovic',
 'Bol Bol',
 'Jonah Bolden',
 'Jordan Bone',
 'Isaac Bonga',
 'Devin Booker',

In [24]:
curPlayerName = activePlayers[0]

In [25]:
CurPlayerStats = PlayerStats( playerName = curPlayerName )

In [26]:
CurPlayerStats.GetPlayerGameLog(seasons=[2020],players=players)

In [27]:
CurPlayerStats.playerLogData

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,Player_Name
0,22020,203500,22000987,"MAY 04, 2021",NOP vs. GSW,W,24,0,3,0.0,...,8,2,0,1,0,0,1,5,1,Steven Adams
1,22020,203500,22000940,"APR 28, 2021",NOP @ DEN,L,13,1,2,0.5,...,1,2,1,0,0,1,2,-5,1,Steven Adams
2,22020,203500,22000264,"APR 24, 2021",NOP vs. SAS,L,32,2,3,0.667,...,10,1,3,0,3,2,4,-2,1,Steven Adams
3,22020,203500,22000893,"APR 22, 2021",NOP @ ORL,W,16,5,8,0.625,...,9,0,0,0,0,2,12,15,1,Steven Adams
4,22020,203500,22000878,"APR 20, 2021",NOP vs. BKN,L,29,3,5,0.6,...,5,1,0,2,0,3,7,0,1,Steven Adams
5,22020,203500,22000862,"APR 18, 2021",NOP @ NYK,L,36,5,7,0.714,...,14,3,1,0,2,2,10,-1,1,Steven Adams
6,22020,203500,22000847,"APR 16, 2021",NOP @ WAS,L,30,1,1,1.0,...,12,4,0,2,0,1,6,7,1,Steven Adams
7,22020,203500,22000835,"APR 14, 2021",NOP vs. NYK,L,31,5,7,0.714,...,10,3,0,0,0,1,10,17,1,Steven Adams
8,22020,203500,22000819,"APR 12, 2021",NOP vs. SAC,W,27,2,4,0.5,...,16,0,1,0,0,1,6,-2,1,Steven Adams
9,22020,203500,22000807,"APR 11, 2021",NOP @ CLE,W,27,0,1,0.0,...,8,3,1,0,2,1,0,-7,1,Steven Adams
