In [1]:
import pandas as pd

In [2]:
# Read data from resources folder
shot_chart_df = pd.read_csv("../resources/nba_shotchartdetail_2018-19.csv")
shot_chart_df.head()

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,...,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM
0,Shot Chart Detail,21800001,7,203496,Robert Covington,1610612755,Philadelphia 76ers,1,11,40,...,Center(C),24+ ft.,26,-53,264,1,0,20181016,BOS,PHI
1,Shot Chart Detail,21800001,10,1628369,Jayson Tatum,1610612738,Boston Celtics,1,11,15,...,Left Side Center(LC),24+ ft.,25,-148,207,1,0,20181016,BOS,PHI
2,Shot Chart Detail,21800001,14,1627759,Jaylen Brown,1610612738,Boston Celtics,1,11,3,...,Center(C),Less Than 8 ft.,1,4,18,1,0,20181016,BOS,PHI
3,Shot Chart Detail,21800001,17,203954,Joel Embiid,1610612755,Philadelphia 76ers,1,10,55,...,Center(C),Less Than 8 ft.,0,-8,3,1,1,20181016,BOS,PHI
4,Shot Chart Detail,21800001,19,1628369,Jayson Tatum,1610612738,Boston Celtics,1,10,36,...,Center(C),Less Than 8 ft.,4,-46,1,1,0,20181016,BOS,PHI


In [3]:
# Convert all column names to lowercase
shot_chart_df.columns = shot_chart_df.columns.str.lower()

In [4]:
# Select only relevant columns
shot_chart_df = shot_chart_df[["shot_made_flag", "shot_type", "period", "minutes_remaining", "seconds_remaining", "shot_distance", "loc_x", "loc_y"]]

In [5]:
# Encode shot type column for use in machine learning
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

data = shot_chart_df.copy()

label_encoder.fit(data["shot_type"])
encoded_shot_type = label_encoder.transform(data.shot_type)
shot_chart_df["shot_type"] = encoded_shot_type  # 0=2pt, 1=3pt

In [6]:
# Combine minutes and seconds columns to a new column
shot_chart_df["seconds_remaining"] = shot_chart_df.seconds_remaining.map("{:02}".format)
shot_chart_df["time_remaining"] = shot_chart_df["minutes_remaining"].astype(str) + ":" + shot_chart_df["seconds_remaining"].astype(str)

In [7]:
# Rename necessary columns
shot_chart_df = shot_chart_df.rename(columns={"shot_made_flag": "outcome"})
shot_chart_df = shot_chart_df[["outcome", "shot_type", "period", "time_remaining", "shot_distance", "loc_x", "loc_y"]]

In [8]:
# Remove duplicates and rows with null values
shot_chart_df = shot_chart_df.drop_duplicates()
shot_chart_df = shot_chart_df.dropna()
shot_chart_df.head()

Unnamed: 0,outcome,shot_type,period,time_remaining,shot_distance,loc_x,loc_y
0,0,1,1,11:40,26,-53,264
1,0,1,1,11:15,25,-148,207
2,0,0,1,11:03,1,4,18
3,1,0,1,10:55,0,-8,3
4,0,0,1,10:36,4,-46,1


In [9]:
# Export clean DataFrame to CSV file
shot_chart_df.to_csv("../resources/nba_shotchartdetail_2018-19_clean.csv", index=False)