In [4]:
import pandas as pd

In [5]:
columns = [
    "team_name",
    "player_name",
    "position_group",
    "position",
    "game_date",
    "home_team",
    "away_team",
    "event_type",
    "shot_made",
    "action_type",
    "shot_type",
    "basic_zone",
    "zone_name",
    "zone_abb",
    "zone_range",
    "loc_x",
    "loc_y",
    "shot_distance",
    "quarter",
    "mins_left",
    "secs_left"
]

shots_df = pd.read_csv("shots.csv", names=columns)

# Display the first few rows to verify
shots_df.head()

Unnamed: 0,team_name,player_name,position_group,position,game_date,home_team,away_team,event_type,shot_made,action_type,...,basic_zone,zone_name,zone_abb,zone_range,loc_x,loc_y,shot_distance,quarter,mins_left,secs_left
0,Los Angeles Lakers,Anthony Davis,C,C,2021-12-09,MEM,LAL,Made Shot,True,Dunk Shot,...,Restricted Area,Center,C,Less Than 8 ft.,0.21,5.925,2,4,2,21
1,Memphis Grizzlies,Desmond Bane,G,SG,2021-12-09,MEM,LAL,Missed Shot,False,Step Back Jump shot,...,Above the Break 3,Center,C,24+ ft.,0.11,8.395,26,4,2,27
2,Denver Nuggets,Will Barton,G,SG,2021-12-09,SAS,DEN,Missed Shot,False,Step Back Jump shot,...,Left Corner 3,Left Side,L,24+ ft.,-2.33,6.185,23,4,2,3
3,Utah Jazz,Jared Butler,G,SG,2021-12-09,PHI,UTA,Made Shot,True,Driving Finger Roll Layup Shot,...,Restricted Area,Center,C,Less Than 8 ft.,0.1,5.855,1,4,0,30
4,San Antonio Spurs,Dejounte Murray,G,PG,2021-12-09,SAS,DEN,Made Shot,True,Driving Floating Jump Shot,...,In The Paint (Non-RA),Left Side,L,8-16 ft.,-0.72,6.605,10,4,2,24


In [6]:
shots_df.shape

(433585, 21)

In [7]:
shots_df.columns

Index(['team_name', 'player_name', 'position_group', 'position', 'game_date',
       'home_team', 'away_team', 'event_type', 'shot_made', 'action_type',
       'shot_type', 'basic_zone', 'zone_name', 'zone_abb', 'zone_range',
       'loc_x', 'loc_y', 'shot_distance', 'quarter', 'mins_left', 'secs_left'],
      dtype='object')

In [8]:
# Check if game_date and player_name could uniquely identify rows
unique_rows = shots_df[['game_date', 'player_name']].drop_duplicates()
print(len(unique_rows) == len(shots_df))

False


In [9]:
# Group by player_name and check if team_name is unique within each player_name
unique_team_names = shots_df.groupby('player_name')['team_name'].nunique()
print(unique_team_names[unique_team_names > 1])

player_name
A.J. Lawson       2
Aaron Holiday     3
Aaron Nesmith     2
Alec Burks        2
Alize Johnson     4
                 ..
Wenyen Gabriel    2
Will Barton       3
Xavier Moon       2
Xavier Sneed      3
Yuta Watanabe     2
Name: team_name, Length: 240, dtype: int64


In [10]:
shots_df_grouped = shots_df.groupby(["game_date", "player_name", "quarter", "mins_left", "secs_left"])
counts = shots_df_grouped.size()
(counts == 1).all()

True

In [11]:
game_data = pd.read_csv("game_info.csv")

In [12]:
game_data

Unnamed: 0,game_id,season,date,away_team,away_score,home_team,home_score,result
0,131410290001,1314,2013-10-29,ORL,87,IND,97,1
1,131410290002,1314,2013-10-29,CHI,95,MIA,107,1
2,131410290003,1314,2013-10-29,LAC,103,LAL,116,1
3,131410300004,1314,2013-10-30,BRK,94,CLE,98,1
4,131410300005,1314,2013-10-30,BOS,87,TOR,93,1
...,...,...,...,...,...,...,...,...
11974,222304091226,2223,2023-04-09,UTA,117,LAL,128,1
11975,222304091227,2223,2023-04-09,NOP,108,MIN,113,1
11976,222304091228,2223,2023-04-09,MEM,100,OKC,115,1
11977,222304091229,2223,2023-04-09,LAC,119,PHO,114,0


In [13]:
#check matches can be done on home_team and away_team
game_home = game_data["home_team"].unique()
shots_home = shots_df["home_team"].unique()
home_teams = set(game_home.tolist() + shots_home.tolist())
home_teams


{'ATL',
 'BKN',
 'BOS',
 'BRK',
 'CHA',
 'CHI',
 'CHO',
 'CLE',
 'DAL',
 'DEN',
 'DET',
 'GSW',
 'HOU',
 'IND',
 'LAC',
 'LAL',
 'MEM',
 'MIA',
 'MIL',
 'MIN',
 'NOP',
 'NYK',
 'OKC',
 'ORL',
 'PHI',
 'PHO',
 'PHX',
 'POR',
 'SAC',
 'SAS',
 'TOR',
 'UTA',
 'WAS'}

In [14]:
#do the same for away teams
game_away = game_data["away_team"].unique()
shots_away = shots_df["away_team"].unique()
away_teams = set(game_away.tolist() + shots_away.tolist())
away_teams

{'ATL',
 'BKN',
 'BOS',
 'BRK',
 'CHA',
 'CHI',
 'CHO',
 'CLE',
 'DAL',
 'DEN',
 'DET',
 'GSW',
 'HOU',
 'IND',
 'LAC',
 'LAL',
 'MEM',
 'MIA',
 'MIL',
 'MIN',
 'NOP',
 'NYK',
 'OKC',
 'ORL',
 'PHI',
 'PHO',
 'PHX',
 'POR',
 'SAC',
 'SAS',
 'TOR',
 'UTA',
 'WAS'}

In [15]:
#do the same for game dates
game_dates = game_data["date"].unique()
shots_dates = shots_df["game_date"].unique()
dates = set(game_dates.tolist() + shots_dates.tolist())
dates

{'2019-03-28',
 '2014-03-17',
 '2022-03-12',
 '2015-03-13',
 '2017-01-05',
 '2023-02-04',
 '2023-03-21',
 '2016-02-29',
 '2013-12-21',
 '2018-12-05',
 '2017-11-04',
 '2022-10-31',
 '2014-11-01',
 '2023-03-17',
 '2013-11-20',
 '2017-01-22',
 '2020-07-31',
 '2022-10-22',
 '2023-01-15',
 '2018-01-31',
 '2014-03-11',
 '2023-02-15',
 '2017-11-01',
 '2017-01-15',
 '2019-10-22',
 '2015-12-03',
 '2019-03-02',
 '2019-01-03',
 '2015-11-28',
 '2020-01-27',
 '2018-11-12',
 '2017-01-27',
 '2019-01-19',
 '2018-01-23',
 '2020-01-22',
 '2021-04-25',
 '2014-12-22',
 '2017-11-28',
 '2022-01-27',
 '2013-11-27',
 '2019-03-16',
 '2020-02-13',
 '2021-05-11',
 '2018-01-29',
 '2019-01-14',
 '2021-11-16',
 '2018-02-02',
 '2022-02-24',
 '2017-01-21',
 '2015-03-05',
 '2022-03-31',
 '2022-03-07',
 '2022-12-07',
 '2021-01-03',
 '2023-03-04',
 '2021-01-01',
 '2016-01-26',
 '2021-11-06',
 '2014-12-02',
 '2021-11-14',
 '2021-03-19',
 '2018-01-18',
 '2022-11-07',
 '2015-01-06',
 '2018-11-01',
 '2015-10-28',
 '2015-11-

In [16]:
# Define a function that checks if 'home_team' or 'away_team' abbreviation letters are all present in team_name after removing spaces
def set_team_abbr_custom(row):

    team_name_upper = row['team_name'].replace(" ", "").upper()
    # Check if 'home_team' letters appear in team_name
    if all(char in team_name_upper for char in row['home_team']):
        return row['home_team']
    # Check if 'away_team' letters appear in team_name
    elif all(char in team_name_upper for char in row['away_team']):
        return row['away_team']
    else:
        return None  

# Apply the function to the dataframe
shots_df['team_abbr'] = shots_df.apply(set_team_abbr_custom, axis=1)

# Display the modified DataFrame to verify the results
shots_df.head()

Unnamed: 0,team_name,player_name,position_group,position,game_date,home_team,away_team,event_type,shot_made,action_type,...,zone_name,zone_abb,zone_range,loc_x,loc_y,shot_distance,quarter,mins_left,secs_left,team_abbr
0,Los Angeles Lakers,Anthony Davis,C,C,2021-12-09,MEM,LAL,Made Shot,True,Dunk Shot,...,Center,C,Less Than 8 ft.,0.21,5.925,2,4,2,21,LAL
1,Memphis Grizzlies,Desmond Bane,G,SG,2021-12-09,MEM,LAL,Missed Shot,False,Step Back Jump shot,...,Center,C,24+ ft.,0.11,8.395,26,4,2,27,MEM
2,Denver Nuggets,Will Barton,G,SG,2021-12-09,SAS,DEN,Missed Shot,False,Step Back Jump shot,...,Left Side,L,24+ ft.,-2.33,6.185,23,4,2,3,DEN
3,Utah Jazz,Jared Butler,G,SG,2021-12-09,PHI,UTA,Made Shot,True,Driving Finger Roll Layup Shot,...,Center,C,Less Than 8 ft.,0.1,5.855,1,4,0,30,UTA
4,San Antonio Spurs,Dejounte Murray,G,PG,2021-12-09,SAS,DEN,Made Shot,True,Driving Floating Jump Shot,...,Left Side,L,8-16 ft.,-0.72,6.605,10,4,2,24,SAS


In [17]:
shots_df_updated = shots_df[["game_date", "player_name", "team_abbr",
    "shot_made",
    "action_type",
    "shot_type",
    "basic_zone",
    "zone_name",
    "zone_abb",
    "zone_range",
    "loc_x",
    "loc_y",
    "shot_distance",
    "quarter",
    "mins_left",
    "secs_left"]]
shots_df_updated

Unnamed: 0,game_date,player_name,team_abbr,shot_made,action_type,shot_type,basic_zone,zone_name,zone_abb,zone_range,loc_x,loc_y,shot_distance,quarter,mins_left,secs_left
0,2021-12-09,Anthony Davis,LAL,True,Dunk Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.21,5.925,2,4,2,21
1,2021-12-09,Desmond Bane,MEM,False,Step Back Jump shot,3PT Field,Above the Break 3,Center,C,24+ ft.,0.11,8.395,26,4,2,27
2,2021-12-09,Will Barton,DEN,False,Step Back Jump shot,3PT Field,Left Corner 3,Left Side,L,24+ ft.,-2.33,6.185,23,4,2,3
3,2021-12-09,Jared Butler,UTA,True,Driving Finger Roll Layup Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.10,5.855,1,4,0,30
4,2021-12-09,Dejounte Murray,SAS,True,Driving Floating Jump Shot,2PT Field,In The Paint (Non-RA),Left Side,L,8-16 ft.,-0.72,6.605,10,4,2,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433580,2021-12-09,Russell Westbrook,LAL,False,Pullup Jump shot,3PT Field,Above the Break 3,Right Side Center,RC,24+ ft.,1.68,7.655,25,4,1,45
433581,2021-12-09,De'Anthony Melton,MEM,False,Jump Shot,3PT Field,Above the Break 3,Left Side Center,LC,24+ ft.,-1.34,7.945,25,4,1,55
433582,2021-12-09,Jaren Jackson Jr.,MEM,False,Jump Shot,3PT Field,Right Corner 3,Right Side,R,24+ ft.,2.27,6.445,23,4,1,59
433583,2021-12-09,Will Barton,DEN,False,Jump Shot,3PT Field,Above the Break 3,Left Side Center,LC,24+ ft.,-1.63,7.835,26,4,1,37


In [18]:
shots_df_grouped = shots_df_updated.groupby(["game_date", "player_name", "team_abbr"])
counts = shots_df_grouped.size()
(counts == 1).all()

False

In [19]:
shots_df_normalized = shots_df[['game_date', 'player_name','team_abbr', 'event_type', 'shot_made', 'action_type', 'shot_type', 
                                'basic_zone', 'zone_name', 'zone_abb', 'zone_range', 
                                'loc_x', 'loc_y', 'shot_distance', 'quarter', 'mins_left', 'secs_left']]

In [20]:
shots_df_normalized

Unnamed: 0,game_date,player_name,team_abbr,event_type,shot_made,action_type,shot_type,basic_zone,zone_name,zone_abb,zone_range,loc_x,loc_y,shot_distance,quarter,mins_left,secs_left
0,2021-12-09,Anthony Davis,LAL,Made Shot,True,Dunk Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.21,5.925,2,4,2,21
1,2021-12-09,Desmond Bane,MEM,Missed Shot,False,Step Back Jump shot,3PT Field,Above the Break 3,Center,C,24+ ft.,0.11,8.395,26,4,2,27
2,2021-12-09,Will Barton,DEN,Missed Shot,False,Step Back Jump shot,3PT Field,Left Corner 3,Left Side,L,24+ ft.,-2.33,6.185,23,4,2,3
3,2021-12-09,Jared Butler,UTA,Made Shot,True,Driving Finger Roll Layup Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.10,5.855,1,4,0,30
4,2021-12-09,Dejounte Murray,SAS,Made Shot,True,Driving Floating Jump Shot,2PT Field,In The Paint (Non-RA),Left Side,L,8-16 ft.,-0.72,6.605,10,4,2,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433580,2021-12-09,Russell Westbrook,LAL,Missed Shot,False,Pullup Jump shot,3PT Field,Above the Break 3,Right Side Center,RC,24+ ft.,1.68,7.655,25,4,1,45
433581,2021-12-09,De'Anthony Melton,MEM,Missed Shot,False,Jump Shot,3PT Field,Above the Break 3,Left Side Center,LC,24+ ft.,-1.34,7.945,25,4,1,55
433582,2021-12-09,Jaren Jackson Jr.,MEM,Missed Shot,False,Jump Shot,3PT Field,Right Corner 3,Right Side,R,24+ ft.,2.27,6.445,23,4,1,59
433583,2021-12-09,Will Barton,DEN,Missed Shot,False,Jump Shot,3PT Field,Above the Break 3,Left Side Center,LC,24+ ft.,-1.63,7.835,26,4,1,37


In [21]:
#a player has taken multiple shots in the same game
shots_df_normalized_grouped = shots_df_normalized.groupby(["game_date", "player_name"])
counts = shots_df_normalized_grouped.size()
(counts == 1).all()

False

In [22]:
#add shots_id
shots_df_normalized.loc[:, "shot_id"] = range(1, len(shots_df_normalized) + 1)
shots_df_normalized

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shots_df_normalized.loc[:, "shot_id"] = range(1, len(shots_df_normalized) + 1)


Unnamed: 0,game_date,player_name,team_abbr,event_type,shot_made,action_type,shot_type,basic_zone,zone_name,zone_abb,zone_range,loc_x,loc_y,shot_distance,quarter,mins_left,secs_left,shot_id
0,2021-12-09,Anthony Davis,LAL,Made Shot,True,Dunk Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.21,5.925,2,4,2,21,1
1,2021-12-09,Desmond Bane,MEM,Missed Shot,False,Step Back Jump shot,3PT Field,Above the Break 3,Center,C,24+ ft.,0.11,8.395,26,4,2,27,2
2,2021-12-09,Will Barton,DEN,Missed Shot,False,Step Back Jump shot,3PT Field,Left Corner 3,Left Side,L,24+ ft.,-2.33,6.185,23,4,2,3,3
3,2021-12-09,Jared Butler,UTA,Made Shot,True,Driving Finger Roll Layup Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.10,5.855,1,4,0,30,4
4,2021-12-09,Dejounte Murray,SAS,Made Shot,True,Driving Floating Jump Shot,2PT Field,In The Paint (Non-RA),Left Side,L,8-16 ft.,-0.72,6.605,10,4,2,24,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433580,2021-12-09,Russell Westbrook,LAL,Missed Shot,False,Pullup Jump shot,3PT Field,Above the Break 3,Right Side Center,RC,24+ ft.,1.68,7.655,25,4,1,45,433581
433581,2021-12-09,De'Anthony Melton,MEM,Missed Shot,False,Jump Shot,3PT Field,Above the Break 3,Left Side Center,LC,24+ ft.,-1.34,7.945,25,4,1,55,433582
433582,2021-12-09,Jaren Jackson Jr.,MEM,Missed Shot,False,Jump Shot,3PT Field,Right Corner 3,Right Side,R,24+ ft.,2.27,6.445,23,4,1,59,433583
433583,2021-12-09,Will Barton,DEN,Missed Shot,False,Jump Shot,3PT Field,Above the Break 3,Left Side Center,LC,24+ ft.,-1.63,7.835,26,4,1,37,433584


In [23]:
shots_df_normalized = shots_df_normalized[['shot_id', 'game_date', 'player_name', 'team_abbr', 'event_type', 'shot_made', 'action_type', 'shot_type', 
                                'basic_zone', 'zone_name', 'zone_abb', 'zone_range', 
                                'loc_x', 'loc_y', 'shot_distance', 'quarter', 'mins_left', 'secs_left']]
shots_df_normalized

Unnamed: 0,shot_id,game_date,player_name,team_abbr,event_type,shot_made,action_type,shot_type,basic_zone,zone_name,zone_abb,zone_range,loc_x,loc_y,shot_distance,quarter,mins_left,secs_left
0,1,2021-12-09,Anthony Davis,LAL,Made Shot,True,Dunk Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.21,5.925,2,4,2,21
1,2,2021-12-09,Desmond Bane,MEM,Missed Shot,False,Step Back Jump shot,3PT Field,Above the Break 3,Center,C,24+ ft.,0.11,8.395,26,4,2,27
2,3,2021-12-09,Will Barton,DEN,Missed Shot,False,Step Back Jump shot,3PT Field,Left Corner 3,Left Side,L,24+ ft.,-2.33,6.185,23,4,2,3
3,4,2021-12-09,Jared Butler,UTA,Made Shot,True,Driving Finger Roll Layup Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.10,5.855,1,4,0,30
4,5,2021-12-09,Dejounte Murray,SAS,Made Shot,True,Driving Floating Jump Shot,2PT Field,In The Paint (Non-RA),Left Side,L,8-16 ft.,-0.72,6.605,10,4,2,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433580,433581,2021-12-09,Russell Westbrook,LAL,Missed Shot,False,Pullup Jump shot,3PT Field,Above the Break 3,Right Side Center,RC,24+ ft.,1.68,7.655,25,4,1,45
433581,433582,2021-12-09,De'Anthony Melton,MEM,Missed Shot,False,Jump Shot,3PT Field,Above the Break 3,Left Side Center,LC,24+ ft.,-1.34,7.945,25,4,1,55
433582,433583,2021-12-09,Jaren Jackson Jr.,MEM,Missed Shot,False,Jump Shot,3PT Field,Right Corner 3,Right Side,R,24+ ft.,2.27,6.445,23,4,1,59
433583,433584,2021-12-09,Will Barton,DEN,Missed Shot,False,Jump Shot,3PT Field,Above the Break 3,Left Side Center,LC,24+ ft.,-1.63,7.835,26,4,1,37


In [24]:
player_stats = pd.read_csv("player_stats.csv")
player_stats

Unnamed: 0,game_id,player,team,FG,FGA,3P,3PA,FT,FTA,ORB,DRB,AST,STL,BLK,PTS
0,131410290001,Arron Afflalo,ORL,3.0,14.0,1.0,5.0,2.0,3.0,1.0,2.0,1.0,0.0,0.0,9.0
1,131410290001,Nikola Vučević,ORL,4.0,11.0,0.0,0.0,0.0,0.0,5.0,5.0,3.0,2.0,1.0,8.0
2,131410290001,Jameer Nelson,ORL,4.0,13.0,3.0,7.0,1.0,1.0,1.0,4.0,7.0,2.0,0.0,12.0
3,131410290001,Jason Maxiell,ORL,0.0,5.0,0.0,0.0,0.0,0.0,1.0,4.0,0.0,1.0,2.0,0.0
4,131410290001,Maurice Harkless,ORL,6.0,13.0,2.0,2.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305727,222304091230,John Butler,POR,1.0,5.0,1.0,3.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,3.0
305728,222304091230,Chance Comanche,POR,3.0,5.0,0.0,0.0,1.0,4.0,2.0,1.0,0.0,0.0,1.0,7.0
305729,222304091230,Jabari Walker,POR,4.0,6.0,0.0,2.0,1.0,2.0,0.0,3.0,0.0,0.0,0.0,9.0
305730,222304091230,Drew Eubanks,POR,,,,,,,,,,,,


In [25]:
#check if player_name matches
player_stats_names = player_stats["player"].unique()
shots_names = shots_df_normalized["player_name"].unique()
names = set(player_stats_names.tolist() + shots_names.tolist())
names

{'Jeff Taylor',
 'Nickeil Alexander-Walker',
 'Brook Lopez',
 'Coty Clarke',
 'Dairis Bertāns',
 'Robert Williams',
 'Marquis Teague',
 'Davis Bertans',
 'Ade Murkey',
 'Ognjen Kuzmić',
 'Pascal Siakam',
 'Deni Avdija',
 'Deandre Ayton',
 'Raul Neto',
 'Álex Abrines',
 'Jose Alvarado',
 'William Howard',
 'Jared Harper',
 'James Bouknight',
 'Miloš Teodosić',
 'Danny Granger',
 'Duane Washington Jr.',
 'Thomas Robinson',
 'Caris LeVert',
 'Jaden McDaniels',
 'Rasual Butler',
 'Dante Exum',
 'Antonius Cleveland',
 'Julian Washburn',
 'Leandro Bolmaro',
 'Brian Bowen',
 'Goran Dragić',
 'Paolo Banchero',
 'Jamil Wilson',
 'Joakim Noah',
 'Ömer Aşık',
 'Jeremy Evans',
 'Gabriel Lundberg',
 'Xavier Munford',
 'Ekpe Udoh',
 'Jordan Loyd',
 'Evan Mobley',
 'Demetrius Jackson',
 'Rodney Purvis',
 'Jonathan Gibson',
 'Brandon Goodwin',
 'Joey Dorsey',
 'Joe Ingles',
 'Hilton Armstrong',
 'Bryce McGowens',
 'Denzel Valentine',
 'Kevin Séraphin',
 'Scottie Barnes',
 'Glenn Robinson III',
 'P.J. 

In [26]:
print(shots_df_normalized["player_name"])

0             Anthony Davis
1              Desmond Bane
2               Will Barton
3              Jared Butler
4           Dejounte Murray
                ...        
433580    Russell Westbrook
433581    De'Anthony Melton
433582    Jaren Jackson Jr.
433583          Will Barton
433584      Dejounte Murray
Name: player_name, Length: 433585, dtype: object


In [27]:
#shot_made = 1 when making shot successfully, shot_made = 0 when missing shot
shots_df_normalized["shot_made"] = shots_df_normalized["shot_made"].astype(int)
shots_df_normalized.drop(columns=['event_type'], inplace=True)
shots_df_normalized.head()

Unnamed: 0,shot_id,game_date,player_name,team_abbr,shot_made,action_type,shot_type,basic_zone,zone_name,zone_abb,zone_range,loc_x,loc_y,shot_distance,quarter,mins_left,secs_left
0,1,2021-12-09,Anthony Davis,LAL,1,Dunk Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.21,5.925,2,4,2,21
1,2,2021-12-09,Desmond Bane,MEM,0,Step Back Jump shot,3PT Field,Above the Break 3,Center,C,24+ ft.,0.11,8.395,26,4,2,27
2,3,2021-12-09,Will Barton,DEN,0,Step Back Jump shot,3PT Field,Left Corner 3,Left Side,L,24+ ft.,-2.33,6.185,23,4,2,3
3,4,2021-12-09,Jared Butler,UTA,1,Driving Finger Roll Layup Shot,2PT Field,Restricted Area,Center,C,Less Than 8 ft.,0.1,5.855,1,4,0,30
4,5,2021-12-09,Dejounte Murray,SAS,1,Driving Floating Jump Shot,2PT Field,In The Paint (Non-RA),Left Side,L,8-16 ft.,-0.72,6.605,10,4,2,24


In [28]:
#shots_df_normalized.to_csv("shots_normalized.csv", index=False)
output_path = '/Users/dorisdu/Desktop/cleaned_shots.csv'
shots_df_normalized.to_csv(output_path, index=False)