In [73]:
import pandas as pd
import numpy as np

In [106]:
pbp_df = pd.concat([
    pd.read_csv('data/archive/2017_pbp.csv'),
    pd.read_csv('data/archive/2018_pbp.csv'),
    pd.read_csv('data/archive/2019_pbp.csv')])

In [107]:
pbp_df

Unnamed: 0,cl,de,epid,etype,evt,game_id,hs,locX,locY,mtype,oftid,opid,opt1,opt2,ord,pid,tid,vs
0,12:00,Start Period,,12,2,21700001,0,0,-80,0,0,,0,0,2000.0,0,0,0
1,11:56,Jump Ball Horford vs Love (Irving gains posses...,202681.0,10,4,21700001,0,0,-80,0,1610612738,201567.0,0,0,6000.0,201143,1610612738,0
2,11:44,[BOS 2-0] Irving Driving Floating Jump Shot: M...,201143.0,1,7,21700001,0,-1,100,101,1610612738,,2,0,7000.0,202681,1610612738,2
3,11:27,[CLE] Rose Layup Shot: Missed,,2,9,21700001,0,-18,4,5,1610612739,201143.0,2,0,9000.0,201565,1610612739,2
4,11:23,[BOS] Horford Rebound (Off:0 Def:1),,4,11,21700001,0,-18,4,0,1610612739,,0,0,11000.0,201143,1610612738,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469205,00:18.9,[CHA 109-98] Bridges Free Throw 2 of 2 (16 PTS),,3,612,21900970,98,0,-80,12,1610612766,,1,0,6120000.0,1628970,1610612766,109
469206,00:10.3,[MIA] Crowder 3pt Shot: Missed,,2,613,21900970,98,-204,182,1,1610612748,,3,0,6130000.0,203109,1610612748,109
469207,00:06.0,[CHA] Martin Rebound (Off:1 Def:6),,4,614,21900970,98,-204,182,0,1610612748,,0,0,6140000.0,1628997,1610612766,109
469208,00:00.0,End Period,,13,616,21900970,98,0,-80,0,1610612766,,0,0,6150000.0,0,0,109


In [108]:
event_types = {
    1: 'made shot',
    2: 'missed shot',
    3: 'free throw',
    4: 'rebound',
    5: 'turnover',
    6: 'personal foul',
    8: 'substitution',
    9: 'timeout',
    10: 'jump ball',
    12: 'start period',
    13: 'end period',
    18: 'instant replay',
    20: 'stoppage'
}

- `hs`: Home score
- `vs`: Away score
- `pid`: Player in possession
- `locX`, `locY`: location of the event
- `opid`: opponent player ID (So, for turnover it would be the defender with steal)
- `oftid`: Offensive team id
- `epid`: Related player ID. (For jumpball, person who picked it up. For goals, assist. for substitution, player replacing pid)

In [109]:
pbp_df.head()

Unnamed: 0,cl,de,epid,etype,evt,game_id,hs,locX,locY,mtype,oftid,opid,opt1,opt2,ord,pid,tid,vs
0,12:00,Start Period,,12,2,21700001,0,0,-80,0,0,,0,0,2000.0,0,0,0
1,11:56,Jump Ball Horford vs Love (Irving gains posses...,202681.0,10,4,21700001,0,0,-80,0,1610612738,201567.0,0,0,6000.0,201143,1610612738,0
2,11:44,[BOS 2-0] Irving Driving Floating Jump Shot: M...,201143.0,1,7,21700001,0,-1,100,101,1610612738,,2,0,7000.0,202681,1610612738,2
3,11:27,[CLE] Rose Layup Shot: Missed,,2,9,21700001,0,-18,4,5,1610612739,201143.0,2,0,9000.0,201565,1610612739,2
4,11:23,[BOS] Horford Rebound (Off:0 Def:1),,4,11,21700001,0,-18,4,0,1610612739,,0,0,11000.0,201143,1610612738,2


In [110]:
pbp_df[~pbp_df['epid'].isnull()].drop_duplicates(['etype'])

Unnamed: 0,cl,de,epid,etype,evt,game_id,hs,locX,locY,mtype,oftid,opid,opt1,opt2,ord,pid,tid,vs
1,11:56,Jump Ball Horford vs Love (Irving gains posses...,202681.0,10,4,21700001,0,0,-80,0,1610612738,201567.0,0,0,6000.0,201143,1610612738,0
2,11:44,[BOS 2-0] Irving Driving Floating Jump Shot: M...,201143.0,1,7,21700001,0,-1,100,101,1610612738,,2,0,7000.0,202681,1610612738,2
40,07:22,[CLE] Love Substitution replaced by Thompson,202684.0,8,60,21700001,9,0,-80,0,1610612738,,0,0,59000.0,201567,1610612739,10


In [111]:
pairings = pbp_df.drop_duplicates(['etype', 'mtype']).sort_values(['etype', 'mtype'])

In [112]:
pairings['de'].values

array(['Game End',
       '[BOS 4-0] Horford Jump Shot: Made (2 PTS) Assist: Irving (1 AST)',
       '[GSW 22-9] Thompson 3pt Shot: Made (11 PTS) Assist: Durant (3 AST)',
       '[BOS 21-29] Horford Hook Shot: Made (4 PTS)',
       '[BOS 12-9] Brown Layup Shot: Made (4 PTS)',
       '[CLE 13-14] Rose Driving Layup Shot: Made (4 PTS)',
       '[HOU 103-109] Capela Dunk Shot: Made (8 PTS)',
       '[HOU 26-29] Gordon Driving Dunk Shot: Made (7 PTS)',
       '[CLE 7-4] Rose Running Layup Shot: Made (2 PTS) Assist: James (2 AST)',
       '[CLE 17-17] Thompson Alley Oop Layup shot: Made (2 PTS) Assist: Smith (1 AST)',
       '[CLE 48-30] Crowder Reverse Layup Shot: Made (9 PTS) Assist: James (6 AST)',
       '[CLE 57-42] Wade Turnaround Jump Shot: Made (7 PTS)',
       '[BOS 10-9] Brown Running Dunk Shot: Made (2 PTS) Assist: Tatum (1 AST)',
       '[DEN 24-16] Barton Reverse Dunk Shot: Made (7 PTS) Assist: Jokic (3 AST)',
       '[BOS 42-54] Tatum Alley Oop Dunk Shot: Made (4 PTS) Assist: 

In [113]:
pairings['de'] = pairings['de'].str.replace(r'\[.*?\]', '', regex=True).str.strip()
pairings['de'] = pairings['de'].str.split(' \(').apply(lambda x: x[0])
pairings['de'] = pairings['de'].str.split(' ').apply(lambda x: ' '.join(x[1:]))

In [114]:
pairings['de'].values

array(['End', 'Jump Shot: Made', '3pt Shot: Made', 'Hook Shot: Made',
       'Layup Shot: Made', 'Driving Layup Shot: Made', 'Dunk Shot: Made',
       'Driving Dunk Shot: Made', 'Running Layup Shot: Made',
       'Alley Oop Layup shot: Made', 'Reverse Layup Shot: Made',
       'Turnaround Jump Shot: Made', 'Running Dunk Shot: Made',
       'Reverse Dunk Shot: Made', 'Alley Oop Dunk Shot: Made',
       'Driving Hook Shot: Made', 'Turnaround Hook Shot: Made',
       'Fadeaway Jump Shot: Made', 'Jump Bank Shot: Made',
       'Hook Bank Shot: Made', 'Finger Roll Layup Shot: Made',
       'Putback Layup Shot: Made',
       'a Moute Driving Reverse Layup Shot: Made',
       'Running Reverse Layup Shot: Made',
       'Driving Finger Roll Layup Shot: Made',
       'Running Finger Roll Layup Shot: Made', 'Floating Jump shot: Made',
       'Pullup Jump shot: Made', 'Step Back Jump shot: Made',
       'Turnaround Fadeaway shot: Made', 'Putback Dunk Shot: Made',
       'Driving Bank Hook Shot: Mad

In [115]:
legend = pairings.set_index(['etype', 'mtype']).to_dict()['de']

In [116]:
legend[(8,0)] = 'Substitution'

In [117]:
legend

{(0, 0): 'End',
 (1, 1): 'Jump Shot: Made',
 (1, 2): '3pt Shot: Made',
 (1, 3): 'Hook Shot: Made',
 (1, 5): 'Layup Shot: Made',
 (1, 6): 'Driving Layup Shot: Made',
 (1, 7): 'Dunk Shot: Made',
 (1, 9): 'Driving Dunk Shot: Made',
 (1, 41): 'Running Layup Shot: Made',
 (1, 43): 'Alley Oop Layup shot: Made',
 (1, 44): 'Reverse Layup Shot: Made',
 (1, 47): 'Turnaround Jump Shot: Made',
 (1, 50): 'Running Dunk Shot: Made',
 (1, 51): 'Reverse Dunk Shot: Made',
 (1, 52): 'Alley Oop Dunk Shot: Made',
 (1, 57): 'Driving Hook Shot: Made',
 (1, 58): 'Turnaround Hook Shot: Made',
 (1, 63): 'Fadeaway Jump Shot: Made',
 (1, 66): 'Jump Bank Shot: Made',
 (1, 67): 'Hook Bank Shot: Made',
 (1, 71): 'Finger Roll Layup Shot: Made',
 (1, 72): 'Putback Layup Shot: Made',
 (1, 73): 'a Moute Driving Reverse Layup Shot: Made',
 (1, 74): 'Running Reverse Layup Shot: Made',
 (1, 75): 'Driving Finger Roll Layup Shot: Made',
 (1, 76): 'Running Finger Roll Layup Shot: Made',
 (1, 78): 'Floating Jump shot: Made',
 

I think we can pretty much understand what is going on using this legend (Have some info cut out, but conveys good enough info for us)

For player ID, we have full name in FinalGameLogs.csv so we can map later.

In [118]:
opt1 = pbp_df.drop_duplicates(['opt1', 'etype']).sort_values('etype')

In [119]:
opt1

Unnamed: 0,cl,de,epid,etype,evt,game_id,hs,locX,locY,mtype,oftid,opid,opt1,opt2,ord,pid,tid,vs
485,00:00.0,Game End,,0,675,21700001,102,0,-80,0,0,,0,0,655000.0,0,0,99
2,11:44,[BOS 2-0] Irving Driving Floating Jump Shot: M...,201143.0,1,7,21700001,0,-1,100,101,1610612738,,2,0,7000.0,202681,1610612738,2
84,03:21,[CLE 20-17] Smith 3pt Shot: Made (3 PTS) Assis...,2544.0,1,112,21700001,20,232,28,1,1610612739,,3,0,111000.0,2747,1610612739,17
3,11:27,[CLE] Rose Layup Shot: Missed,,2,9,21700001,0,-18,4,5,1610612739,201143.0,2,0,9000.0,201565,1610612739,2
5,11:21,[BOS] Hayward 3pt Shot: Missed,,2,12,21700001,0,-214,138,1,1610612738,,3,0,12000.0,202330,1610612738,2
127,11:10,[CLE] Rose Free Throw 2 of 2 Missed,,3,182,21700001,30,0,-80,12,1610612739,,2,0,178000.0,201565,1610612739,21
14,10:32,[CLE 1-4] Love Free Throw Technical (1 PTS),,3,23,21700001,1,0,-80,16,1610612739,,1,0,23000.0,201567,1610612739,4
4,11:23,[BOS] Horford Rebound (Off:0 Def:1),,4,11,21700001,0,-18,4,0,1610612739,,0,0,11000.0,201143,1610612738,2
10,10:51,[BOS] Team Rebound,,4,18,21700001,0,10,24,0,1610612738,,1,0,18000.0,0,1610612738,2
16,10:05,[BOS] Brown Turnover : Palming Turnover (1 TO),,5,26,21700001,3,18,173,21,1610612738,,0,0,26000.0,1627759,1610612738,4


By looking at the above, I can infer that `opt1` is:
- for shots, it's the point attempted by the shot
- for fouls, it's whether it's a shooting foul(1) or not(0)
- for rebounds, it's whether it's an offensive reb(1) or def reb(0)

Grabbing rebounds per shot distance

In [120]:
test_df = pbp_df[pbp_df['game_id'] == 21900001]

In [121]:
test_df[(test_df['etype'] == 2) | (test_df['etype'] == 4)].to_csv('check.csv')

Rebound actually has the previous shooting information (is associated already), so we just have to extract defensive rebounds.

In [122]:
test_df[(test_df['etype']==4) & (test_df['opt1'] == 0)]

Unnamed: 0,cl,de,epid,etype,evt,game_id,hs,locX,locY,mtype,oftid,opid,opt1,opt2,ord,pid,tid,vs
6,11:25,[NOP] Redick Rebound (Off:0 Def:1),,4,11,21900001,0,15,29,0,1610612761,,0,0,110000.0,200755,1610612740,2
8,11:15,[TOR] VanVleet Rebound (Off:0 Def:1),,4,13,21900001,0,81,-1,0,1610612740,,0,0,130000.0,1627832,1610612761,2
10,11:08,[NOP] Favors Rebound (Off:1 Def:1),,4,15,21900001,0,178,177,0,1610612761,,0,0,150000.0,202324,1610612740,2
12,10:56,[TOR] Lowry Rebound (Off:0 Def:1),,4,17,21900001,0,18,119,0,1610612740,,0,0,170000.0,200768,1610612761,2
16,10:51,[NOP] Favors Rebound (Off:1 Def:2),,4,22,21900001,1,0,-80,0,1610612761,,0,0,210001.0,202324,1610612740,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
556,03:21,[NOP] Ball Rebound (Off:0 Def:5),,4,762,21900001,119,-145,199,0,1610612761,,0,0,7590000.0,1628366,1610612740,120
569,01:44,[TOR] Powell Rebound (Off:1 Def:7),,4,781,21900001,124,-24,108,0,1610612740,,0,0,7780000.0,1626181,1610612761,122
577,01:14,[TOR] Anunoby Rebound (Off:3 Def:4),,4,792,21900001,127,0,-5,0,1610612740,,0,0,7890000.0,1628384,1610612761,122
580,00:40.6,[TOR] Gasol Rebound (Off:1 Def:3),,4,795,21900001,130,230,-5,0,1610612740,,0,0,7920000.0,201188,1610612761,122


https://rdrr.io/github/sndmrc/BasketAnalyzeR/man/PbP.BDB.html
tracking coordinate system half court

In [151]:
def_revs = pbp_df[(pbp_df['etype']==4) & (pbp_df['opt1'] == 0) & (pbp_df['pid'] != 0)]

In [142]:
min(def_revs['locY']), max(def_revs['locY'])

(-80, 850)

In [143]:
min(def_revs['locX']), max(def_revs['locX'])

(-250, 250)

In [157]:
def_revs['distance'] = np.sqrt(def_revs['locX']**2 + def_revs['locY']**2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [145]:
pid_mapper = pd.read_csv('data/archive/FinalGameLogs.csv').drop_duplicates('idPlayer').set_index('idPlayer')['Player'].to_dict()

In [149]:
tid_mapper = pd.read_csv('data/archive/FinalGameLogs.csv').drop_duplicates('idTeam').set_index('idTeam')['nameTeam'].to_dict()

In [146]:
pid_mapper[203076]

'Anthony Davis'

In [154]:
def_revs['player'] = def_revs['pid'].map(pid_mapper)
def_revs['team'] = def_revs['tid'].map(tid_mapper)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [159]:
def_revs[['game_id', 'player', 'team', 'pid', 'tid', 'locX', 'locY', 'cl', 'distance']]

Unnamed: 0,game_id,player,team,pid,tid,locX,locY,cl,distance
4,21700001,Al Horford,Boston Celtics,201143,1610612738,-18,4,11:23,18.439089
6,21700001,Derrick Rose,Cleveland Cavaliers,201565,1610612739,-214,138,11:18,254.636997
8,21700001,Jaylen Brown,Boston Celtics,1627759,1610612738,-131,83,10:59,155.080624
20,21700001,LeBron James,Cleveland Cavaliers,2544,1610612739,-11,6,09:31,12.529964
30,21700001,Kevin Love,Cleveland Cavaliers,201567,1610612739,-7,31,08:33,31.780497
...,...,...,...,...,...,...,...,...,...
469168,21900970,Kendrick Nunn,Miami Heat,1629134,1610612748,22,18,03:06,28.425341
469177,21900970,Duncan Robinson,Miami Heat,1629130,1610612748,242,8,01:46,242.132195
469187,21900970,Caleb Martin,Charlotte Hornets,1628997,1610612766,-50,25,00:52.7,55.901699
469202,21900970,Miles Bridges,Charlotte Hornets,1628970,1610612766,114,239,00:22.5,264.796148


In [160]:
def_revs[['game_id', 'player', 'team', 'pid', 'tid', 'locX', 'locY', 'cl', 'distance']].to_csv('data/archive/def_revs.csv')