In [1]:
import pandas as pd
import matplotlib as plt
import numpy as np
from math import *

In [27]:
# function calculating the euclidean distance from the basket which is centered at (0,0)
def distance(x, y):
    distances = np.sqrt(np.square(x) + np.square(y))
    return distances
    
# function determining whether a field-goal is a 3-pointer: 0 is a 2 pointer, 1 is a non-corner 3-pointer, 2 is a corner 3
def classification(data):
        x, y = data.x, data.y
        distances = distance(x,y)
        data["distances"] = distances
        corner = y>7.8
        cl = np.zeros(len(x))
        for i,j in enumerate(corner,0):
            if j:
                if distances[i] >=23.75:
                    cl[i] = 1
                else:
                    cl[i] = 0
            else:
                if distances[i] >= 22:
                    cl[i] = 2
                else:
                    cl[i] = 0
        data['classification'] = cl
        return data
    
# assigns 1 for a field goal if it is a 3 and 0 if not        
def three_pointer_made(data):
    data = data[data.classification >=1]
    total = np.sum(data.fgmade)
    return total

# calculates the percentage of shots attempted in a zone
def zone_percentage(zone, data, fga):
    category = (data['classification'] == zone).astype(int)
    return round(np.sum(category) /fga * 100, 3)

#calculates the total effective field goal percentage for a team
def total_efg(data, fga):
        fgm = np.sum(data['fgmade'])
        three_pm = three_pointer_made(data)
        return round((fgm+(0.5*three_pm))/fga * 100, 3)

#calculates the effective field goal percentage for a zone
def efg_zone(data, zone):
    data = data[data["classification"]==zone]
    fga = data.shape[0]
    return total_efg(data,fga)
    
    

In [3]:
# importing data set into notebook, 
shots_data = pd.read_csv("./utility/shots_data.csv")
shots_data

Unnamed: 0,team,x,y,fgmade
0,Team A,-23.1,3.5,0
1,Team A,0.0,25.1,1
2,Team A,0.5,1.0,1
3,Team A,-5.6,5.2,0
4,Team A,4.0,14.5,1
...,...,...,...,...
555,Team B,7.8,24.7,0
556,Team B,10.1,1.4,1
557,Team B,-17.8,-0.1,0
558,Team B,22.6,2.9,0


In [4]:
# copy of data for distribution of shots, maybe used later
data_a = shots_data.copy()

In [5]:
shots_data.describe()

Unnamed: 0,x,y,fgmade
count,560.0,560.0,560.0
mean,1.049821,11.156071,0.392857
std,11.074739,9.487093,0.488822
min,-23.6,-1.5,0.0
25%,-4.25,2.0,0.0
50%,0.6,8.5,0.0
75%,7.475,20.725,1.0
max,23.8,29.7,1.0


Shot Distribution Section

In [6]:
# add a column to the data frame representing classification of each shot (function above)
classification(shots_data)
shots_data.describe()

Unnamed: 0,x,y,fgmade,distances,classification
count,560.0,560.0,560.0,560.0,560.0
mean,1.049821,11.156071,0.392857,15.436219,0.478571
std,11.074739,9.487093,0.488822,9.986809,0.629824
min,-23.6,-1.5,0.0,0.223607,0.0
25%,-4.25,2.0,0.0,3.915344,0.0
50%,0.6,8.5,0.0,18.122634,0.0
75%,7.475,20.725,1.0,25.322667,1.0
max,23.8,29.7,1.0,29.917386,2.0


In [7]:
# seperating data into Team A and B
teams = shots_data.groupby(shots_data.team)
team_a = teams.get_group('Team A')
team_b = teams.get_group('Team B')
fga_teama = team_a.shape[0]
fga_teamb = team_b.shape[0]

In [28]:
zones = [0, 1, 2]
zone_names = ["2PT", "NC3", "C3"]
print("For Team A the shot distribution is given below:\n")
for i,j in enumerate(zones, 0):
    print("Field goals attempted in the " + str(zone_names[i]) + " zone is " + str(zone_percentage(j,team_a, fga_teama)) + "%\n")


print("For Team B the shot distribution is given below:\n")
for i,j in enumerate(zones, 0):
    print("Field goals attempted in the " + str(zone_names[i]) + " zone is " + str(zone_percentage(j,team_b, fga_teamb)) + "%\n")


For Team A the shot distribution is given below:

Field goals attempted in the 2PT zone is 60.714%

Field goals attempted in the NC3 zone is 32.143%

Field goals attempted in the C3 zone is 7.143%

For Team B the shot distribution is given below:

Field goals attempted in the 2PT zone is 58.214%

Field goals attempted in the NC3 zone is 34.286%

Field goals attempted in the C3 zone is 7.5%



Effective Field Goal Percentage 

In [29]:
# finding both teams total effective field goal percentage
print("The total effective field goal percentage for team A was " + str(total_efg(team_a,fga_teama)) + "%\n")
print("The total effective field goal percentage for team B was " + str(total_efg(team_b,fga_teamb)) + "%\n")

The total effective field goal percentage for team A was 44.107%

The total effective field goal percentage for team B was 48.214%



In [30]:
# finding effective field goal percentage by zone
zones = [0, 1, 2]
zone_names = ["2PT", "NC3", "C3"]
for i,j in enumerate(zones):
    print("The effective field percentage of Team A in the " + str(zone_names[i]) + " zone was " + str(efg_zone(team_a,j)) + "%\n")
for i,j in enumerate(zones):
    print("The effective field percentage of Team B in the " + str(zone_names[i]) + " zone was " + str(efg_zone(team_b,j)) + "%\n")

The effective field percentage of Team A in the 2PT zone was 40.0%

The effective field percentage of Team A in the NC3 zone was 48.333%

The effective field percentage of Team A in the C3 zone was 60.0%

The effective field percentage of Team B in the 2PT zone was 46.012%

The effective field percentage of Team B in the NC3 zone was 54.688%

The effective field percentage of Team B in the C3 zone was 35.714%

