In [1]:
import pandas as pd
from tqdm import tqdm
from math import radians, cos, sin, asin, sqrt 

In [2]:
## We will now create a table of nearest stores to enable local store operations to make inventory movement decisions
location_df = pd.read_csv("location.csv", index_col=None)
location_df.head(5)

Unnamed: 0,StoreID,RegionCountryName,StateProvinceName,lat,long
0,4,United States,Washington,47.411373,-120.556366
1,156,United Kingdom,England,51.500153,-0.126236
2,88,United States,Texas,31.463793,-99.333275
3,214,United Kingdom,England,51.500153,-0.126236
4,201,United Kingdom,England,51.500153,-0.126236


In [3]:
## Function for calculating distance
def get_distance(lat1, lat2, lon1, lon2): 
      
    # The math module contains a function named 
    # radians which converts from degrees to radians. 
    lon1 = radians(lon1) 
    lon2 = radians(lon2) 
    lat1 = radians(lat1) 
    lat2 = radians(lat2) 
       
    # Haversine formula  
    dlon = lon2 - lon1  
    dlat = lat2 - lat1 
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * asin(sqrt(a))  
     
    # Radius of earth in kilometers. Use 3956 for miles 
    r = 6371
    distance = c * r
    
    return distance

In [4]:
## Create a dictionary of StoreID to lat long for efficient lookup
location_dict = {}
for ind, row in location_df.iterrows():
    location_dict[row.StoreID] = [row.lat, row.long]
location_dict

{4: [47.41137314, -120.556366],
 156: [51.50015259, -0.12623600699999998],
 88: [31.4637928, -99.33327484],
 214: [51.50015259, -0.12623600699999998],
 201: [51.50015259, -0.12623600699999998],
 215: [48.85662079, 2.342922926],
 309: [48.85662079, 2.342922926],
 17: [47.41137314, -120.556366],
 253: [-32.16671371, 147.01048280000003],
 300: [-32.16671371, 147.01048280000003],
 289: [54.5625, -125.1166],
 290: [54.5625, -125.1166],
 22: [54.5625, -125.1166],
 6: [47.41137314, -120.556366],
 204: [51.50015259, -0.12623600699999998],
 79: [31.4637928, -99.33327484],
 228: [52.50147247, 13.40231705],
 306: [52.50147247, 13.40231705],
 291: [55.16929626, -114.5118027],
 16: [47.41137314, -120.556366],
 212: [51.50015259, -0.12623600699999998],
 5: [47.41137314, -120.556366],
 15: [47.41137314, -120.556366],
 1: [47.41137314, -120.556366],
 2: [47.41137314, -120.556366],
 308: [47.41137314, -120.556366],
 264: [39.19427872, 59.17978668],
 298: [39.19427872, 59.17978668],
 262: [15.13178444, 

In [5]:
## Create a store location dataframe matrix
df_list = []
for start_id in tqdm(location_dict.keys()):
    df = location_df[location_df.StoreID != start_id]
    storeA_lat = location_dict[start_id][0]
    storeA_long = location_dict[start_id][1]
    for ind, row in df.iterrows():
        storeB_id = row.StoreID
        storeB_lat = row.lat
        storeB_long = row.long
        distance = get_distance(storeA_lat, storeB_lat, storeA_long, storeB_long)
        df_list.append({"storeA": start_id,
                        "storeB": storeB_id,
                        "distance": distance})
        
distance_df = pd.DataFrame(df_list)


100%|████████████████████████████████████████████████████████████████████████████████| 305/305 [00:08<00:00, 35.08it/s]


In [6]:
## Getting store combinations that has distance lesser than 100 km 
distance_200km_df = distance_df[distance_df.distance < 200]

In [7]:
## Get top 10 nearest stores
storeID = 223
distance_200km_df[distance_200km_df.storeA == storeID].sort_values(by=["distance"], ascending = False)

Unnamed: 0,storeA,storeB,distance
33223,223,244,147.322063
