In [1]:
import numpy as np
import pandas as pd
import math 

#Import Data
df = pd.read_csv("18_19_raw_scores.csv")

#Split data into home and away team
df_new1 = df[df.index%2 == 0]
df_new2 = df[df.index%2 != 0]

#Combine split data into one dataframe
new_18_19 = pd.merge(df_new2,df_new1[["GAME_ID","TEAM_ABBREVIATION","PTS","TEAM_WINS_LOSSES"]], 
                     on = "GAME_ID", how = "left")

#Keep/Transform Column Names
new_18_19["OT"] = new_18_19["PTS_OT1"] > 0
box_score = new_18_19.drop(columns = ["TEAM_ID","GAME_SEQUENCE","PTS_QTR1","PTS_QTR2","PTS_QTR3","PTS_QTR4","PTS_OT1",
                                 "PTS_OT2","PTS_OT3","PTS_OT4","PTS_OT5","PTS_OT6","PTS_OT7", "PTS_OT8","PTS_OT9",
                                 "PTS_OT10","FG_PCT",  "FT_PCT","FG3_PCT","AST","REB","TOV"])

#Rename kept columns
box_score = box_score.rename(columns = {"Unnamed: 0":"Date", "TEAM_ABBREVIATION_x":"Home_Team", 
                                        "TEAM_CITY_NAME":"Home_City", "TEAM_WINS_LOSSES_x": 
                                        "Home_Record","PTS_x":"Home_Points", "TEAM_ABBREVIATION_y":"Away_Team", 
                                        "PTS_y":"Away_Points", "TEAM_WINS_LOSSES_y":"Away_Record"})

box_score.to_csv("18_19_clean.csv")

In [74]:
city_long_lat = pd.read_csv("nba_arena_long_lat.csv")
city_long_lat = city_long_lat.rename(columns = {"ABBREVIATION ":"abbv", "LATITUDE":"lat", "LONGITUDE":"long"})
city_long_lat.head()

Unnamed: 0,abbv,lat,long
0,MIA,25.781389,-80.188056
1,DAL,32.790556,-96.810278
2,ORL,28.539167,-81.383611
3,SAS,29.426944,-98.4375
4,IND,39.763889,-86.155556


In [75]:
from math import atan2, sin, cos, sqrt, radians

#function to convert long/lat coords to distance in kms
def long_lat_dist(lat1, long1, lat2, long2):
    
    #find radians of coords
    rlat1 = radians(lat1)
    rlong1 = radians(long1)
    rlat2 = radians(lat2)
    rlong2 = radians(long2)
        
    #find differnece of lat/long
    dlat = rlat1 - rlat2
    dlong = rlong1 - rlong2
    
    #formula
    a = sin((dlat) / 2)**2 + cos(rlat1) * cos(rlat2) * sin(dlong / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    dist = 6373 * c
    
    return dist

In [76]:
def get_dist(newdf, k):
    #find the home team and previous hom team
    home_team = newdf.iloc[[k]].Home_Team.values[0]
    prev_home = newdf.iloc[[k-1]].Home_Team.values[0]
    
    #find long/lat of the current home team and previous home teams
    long1 = city_long_lat.loc[city_long_lat['abbv'] ==  home_team].long.values[0]
    lat1 = city_long_lat.loc[city_long_lat['abbv'] ==  home_team].lat.values[0]
    long2 = city_long_lat.loc[city_long_lat['abbv'] ==  prev_home].long.values[0]
    lat2 = city_long_lat.loc[city_long_lat['abbv'] ==  prev_home].lat.values[0]
    
    #find a return distance between those two locations
    dist = long_lat_dist(lat1, long1, lat2, long2)
    return dist


In [79]:
def all_team_dist(box_score):
    teams = box_score["Home_Team"].unique()
    #create a dataframe to store distances by each game id
    distance = pd.DataFrame()
    distance["GAME_ID"] = box_score["GAME_ID"]
    
    #go through by each team
    for i in range(len(teams)):  
        #create a df of every game the team played
        newdf = box_score[(box_score.Home_Team == teams[i]) | (box_score.Away_Team == teams[i])]
        #go through every game in the new df 
        for k in range(len(newdf["GAME_ID"])):  
            #if the team was home
            if newdf.iloc[[k]].Home_Team.values[0] == teams[i]:
                #was it the first game of the season
                if k == 0:          
                    distance.loc[distance['GAME_ID'] == newdf.iloc[[k]].GAME_ID.values[0], 'Home_Distance'] = 0
                #else calc distance and place it in distance df    
                else:  
                    dist1 = get_dist(newdf, k)
                    distance.loc[distance['GAME_ID'] == newdf.iloc[[k]].GAME_ID.values[0], 'Home_Distance'] = dist1
            #team was away then same process
            else: 
                if k == 0:
                    distance.loc[distance['GAME_ID'] == newdf.iloc[[k]].GAME_ID.values[0], 'Away_Distance'] = 0
                else:
                    dist2 = get_dist(newdf, k)
                    distance.loc[distance['GAME_ID'] == newdf.iloc[[k]].GAME_ID.values[0], 'Away_Distance'] = dist2
    
    return distance
                

distance = all_team_dist()
distance

Unnamed: 0,GAME_ID,Home_Distance,Away_Distance
0,21800001,0.000000,0.000000
1,21800002,0.000000,0.000000
2,21800003,0.000000,0.000000
3,21800004,0.000000,0.000000
4,21800005,0.000000,0.000000
...,...,...,...
1225,21801226,0.000000,1179.275404
1226,21801227,2019.362731,404.939354
1227,21801228,595.877137,1124.440778
1228,21801229,543.758097,935.332592


In [96]:
mega = box_score.merge(distance, on = "GAME_ID", how = "left")
mega

Unnamed: 0,Date,GAME_ID,Home_Team,Home_City,Home_Record,Home_Points,Away_Team,Away_Points,Away_Record,OT,Home_Distance,Away_Distance
0,2018-10-16,21800001,BOS,Boston,1-0,105,PHI,87,0-1,False,0.000000,0.000000
1,2018-10-16,21800002,GSW,Golden State,1-0,108,OKC,100,0-1,False,0.000000,0.000000
2,2018-10-17,21800003,CHA,Charlotte,0-1,112,MIL,113,1-0,False,0.000000,0.000000
3,2018-10-17,21800004,DET,Detroit,1-0,103,BKN,100,0-1,False,0.000000,0.000000
4,2018-10-17,21800005,IND,Indiana,1-0,111,MEM,83,0-1,False,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
1225,2019-04-10,21801226,MIL,Milwaukee,60-22,116,OKC,127,49-33,False,0.000000,1179.275404
1226,2019-04-10,21801227,SAS,San Antonio,48-34,105,DAL,94,33-49,False,2019.362731,404.939354
1227,2019-04-10,21801228,DEN,Denver,54-28,99,MIN,95,36-46,False,595.877137,1124.440778
1228,2019-04-10,21801229,LAC,LA,48-34,143,UTA,137,50-32,True,543.758097,935.332592
