# Calculate Distance between two Zip Codes

#### The code below is a simple calculation to find the distance between two lat and long points using data from the National Bureau of Economic Research - https://www.nber.org/data/zip-code-distance-database.html

In [1]:
import pandas as pd
df = pd.read_csv('gaz2016zcta5centroid.csv')

In [2]:
df.head()

Unnamed: 0,intptlat,intptlong,zcta5
0,18.180555,-66.749962,601
1,18.361944,-67.175598,602
2,18.455183,-67.119888,603
3,18.158344,-66.932915,606
4,18.295366,-67.125137,610


In [3]:
## Grabbing 2 records to test
df1 = df[400:401]
df2 = df[999:1000]

In [4]:
## concating them to be in the same row
test = pd.concat([df1, df2], axis=1)
test = test.fillna(method='bfill')
test = test[0:1]
test.columns = ['lat1','long1','zip1','lat2','long2','zip2']
test

Unnamed: 0,lat1,long1,zip1,lat2,long2,zip2
400,42.673908,-71.091331,1845.0,43.014996,-70.902588,3885.0


In [5]:
## Using Haversine formula

import math

def distance(lat1,lon1,lat2,lon2):
    radius = 6371 # km

    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    step1 = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    step2 = 2 * math.atan2(math.sqrt(step1), math.sqrt(1-step1))
    step3 = radius * step2

    return step3

In [6]:
test['Distance in km'] = distance(test['lat1'],test['long1'],test['lat2'],test['long2'])


## Conver to miles 
test['Distance in miles'] = test['Distance in km']/1.609344

In [7]:
test

Unnamed: 0,lat1,long1,zip1,lat2,long2,zip2,Distance in km,Distance in miles
400,42.673908,-71.091331,1845.0,43.014996,-70.902588,3885.0,40.929915,25.43267
