### Libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
import pandas as pd

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import time

### Data Collection

In [2]:
people = pd.read_csv("people.csv", index_col=0)
people.head()

Unnamed: 0_level_0,Lat,Lon
People,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2.868615,101.673326
1,2.878383,101.607508
2,2.871754,101.599514
3,3.027363,101.652546
4,2.997368,101.626043


In [3]:
ppv = pd.read_csv("ppv.csv", index_col=0)
ppv.head()

Unnamed: 0_level_0,Lat,Lon
PPV,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2.864831,101.694876
1,3.311896,101.585703
2,3.020707,101.650831
3,3.096667,101.706738
4,3.26352,101.470738


### Method 1

In [4]:
def nearest_ppv(df_1, df_2):
    people_lat = np.array(df_1['Lat'].tolist())
    people_lon = np.array(df_1['Lon'].tolist())
    ppv_lat = np.array(df_2['Lat'].tolist())
    ppv_lon = np.array(df_2['Lon'].tolist())
    people_lat = np.deg2rad(people_lat)
    people_lon = np.deg2rad(people_lon)
    ppv_lat = np.deg2rad(ppv_lat)
    ppv_lon = np.deg2rad(ppv_lon)
    diff_lat = people_lat[:,None] - ppv_lat[:]
    diff_lng = people_lon[:,None] - ppv_lon[:]
    d = np.sin(diff_lat/2)**2 + np.cos(people_lat[:,None])*np.cos(ppv_lat[:]) * np.sin(diff_lng/2)**2
    return 2 * 6371 * np.arcsin(np.sqrt(d))

In [5]:
execution_time = {}
for n in range(50):
    # get the start time
    st = time.time()

    people["nearest_ppv"] = ppv.loc[ppv.index[np.argmin(nearest_ppv(people, ppv), axis=1)]].index
    people.head()

    # get the end time
    et = time.time()

    # get the execution time
    elapsed_time = et - st
    execution_time[n+1] = elapsed_time

In [6]:
df_execution_time = pd.DataFrame.from_dict(execution_time, orient='index', columns=['Time'])

In [7]:
people.to_csv('people_to_ppv_1.csv')

In [8]:
df_execution_time.to_csv('execution_time_1.csv')

### Testing

In [9]:
df_1 = pd.DataFrame({"ID": [1, 2], 
                     "lat": [-2.0, -0.5],
                    "lon": [0.0, 0.0]})
df_1.set_index("ID", inplace=True)
df_2 = pd.DataFrame({"Name": ["A", "B", "C"], 
                     "lat": [1.0, 0.0,-2.0],
                    "lon": [0.0, 0.0, 0.0]})
df_2.set_index("Name", inplace=True)

In [10]:
def min_distance(df_1, df_2):
    df_1_lat = np.array(df_1['lat'].tolist())
    df_1_lon = np.array(df_1['lon'].tolist())
    df_2_lat = np.array(df_2['lat'].tolist())
    df_2_lon = np.array(df_2['lon'].tolist())
    diff_lat = df_1_lat[:,None] - df_2_lat[:]
    diff_lng = df_1_lon[:,None] - df_2_lon[:]
    d = (diff_lat/2)**2 + (diff_lng/2)**2
    return (np.sqrt(d))

In [11]:
df_1["min_dist"] = df_2.loc[df_2.index[np.argmin(min_distance(df_1, df_2), axis=1)]].index
df_1.head()

Unnamed: 0_level_0,lat,lon,min_dist
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,-2.0,0.0,C
2,-0.5,0.0,B


### Method 2

In [12]:
execution_time = {}
for n in range(50):
    # get the start time
    st = time.time()
    
    people_to_ppv = []
    for i in range(len(people)):
        dist = []
        people_lat = people.loc[i, 'Lat']
        people_lon = people.loc[i, 'Lon']
        people_lat = np.deg2rad(people_lat)
        people_lon = np.deg2rad(people_lon)
        for j in range(len(ppv)):
            ppv_lat = ppv.loc[j, 'Lat']
            ppv_lon = ppv.loc[j, 'Lon']
            ppv_lat = np.deg2rad(ppv_lat)
            ppv_lon = np.deg2rad(ppv_lon)
            diff_lat = people_lat - ppv_lat
            diff_lng = people_lon - ppv_lon
            d = np.sin(diff_lat/2)**2 + np.cos(people_lat)*np.cos(ppv_lat) * np.sin(diff_lng/2)**2
            dist.append(2 * 6371 * np.arcsin(np.sqrt(d)))
            if j+1 == len(ppv):
                people_to_ppv.append(np.argmin(dist))
    
    # get the end time
    et = time.time()

    # get the execution time
    elapsed_time = et - st
    execution_time[n+1] = elapsed_time

In [13]:
people["nearest_ppv"] = ppv.loc[ppv.index[np.array(people_to_ppv)]].index
people.head()

Unnamed: 0_level_0,Lat,Lon,nearest_ppv
People,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2.868615,101.673326,0
1,2.878383,101.607508,44
2,2.871754,101.599514,44
3,3.027363,101.652546,2
4,2.997368,101.626043,19


In [14]:
df_execution_time = pd.DataFrame.from_dict(execution_time, orient='index', columns=['Time'])

In [15]:
people.to_csv('people_to_ppv_2.csv')

In [16]:
df_execution_time.to_csv('execution_time_2.csv')

### Method 3

In [17]:
execution_time = {}
for n in range(50):
    # get the start time
    st = time.time()
    
    people_to_ppv = np.empty(len(people), dtype=int)
    i = 0
    while i < len(people):
        dist = np.empty(len(ppv))
        people_lat = people.loc[i, 'Lat']
        people_lon = people.loc[i, 'Lon']
        people_lat = np.deg2rad(people_lat)
        people_lon = np.deg2rad(people_lon)
        i += 1 
        j = 0
        while j < len(ppv):
            ppv_lat = ppv.loc[j, 'Lat']
            ppv_lon = ppv.loc[j, 'Lon']
            ppv_lat = np.deg2rad(ppv_lat)
            ppv_lon = np.deg2rad(ppv_lon)
            diff_lat = people_lat - ppv_lat
            diff_lng = people_lon - ppv_lon
            d = np.sin(diff_lat/2)**2 + np.cos(people_lat)*np.cos(ppv_lat) * np.sin(diff_lng/2)**2
            dist[j] = 2 * 6371 * np.arcsin(np.sqrt(d))
            j += 1
            if j == len(ppv):
                people_to_ppv[i-1] = np.argmin(dist)
            
    # get the end time
    et = time.time()

    # get the execution time
    elapsed_time = et - st
    execution_time[n+1] = elapsed_time

In [18]:
people["nearest_ppv"] = ppv.loc[ppv.index[people_to_ppv]].index
people.head()

Unnamed: 0_level_0,Lat,Lon,nearest_ppv
People,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2.868615,101.673326,0
1,2.878383,101.607508,44
2,2.871754,101.599514,44
3,3.027363,101.652546,2
4,2.997368,101.626043,19


In [19]:
df_execution_time = pd.DataFrame.from_dict(execution_time, orient='index', columns=['Time'])

In [20]:
people.to_csv('people_to_ppv_3.csv')

In [21]:
df_execution_time.to_csv('execution_time_3.csv')