In [156]:
'''
Challenge link
https://preppindata.blogspot.com/2023/03/2023-week-11-which-customers-are-closest.html
'''

import pandas as pd
import numpy as np
import datetime as datetime
import math

In [157]:
branches_df = pd.read_csv("DSB Branches.csv")
branches_df.info()
branches_df.head(4)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Branch       4 non-null      object 
 1   Branch Long  4 non-null      float64
 2   Branch Lat   4 non-null      float64
dtypes: float64(2), object(1)
memory usage: 224.0+ bytes


Unnamed: 0,Branch,Branch Long,Branch Lat
0,London,-0.109863,51.481383
1,Oxford,-1.252441,51.727028
2,Nottingham,-1.142578,52.948638
3,Newcastle,-1.604004,54.965002


In [158]:
cust_df = pd.read_csv("DSB Customer Locations.csv")
cust_df.info()
cust_df.head(2)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Customer      125 non-null    int64  
 1   Address Long  125 non-null    float64
 2   Address Lat   125 non-null    float64
dtypes: float64(2), int64(1)
memory usage: 3.1 KB


Unnamed: 0,Customer,Address Long,Address Lat
0,1,0.74707,51.549751
1,2,0.406494,51.957807


### Append the Branch information to the Customer information


In [159]:
df = cust_df.merge(branches_df, how='cross')
df.info()
df.head(5)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 500 entries, 0 to 499
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Customer      500 non-null    int64  
 1   Address Long  500 non-null    float64
 2   Address Lat   500 non-null    float64
 3   Branch        500 non-null    object 
 4   Branch Long   500 non-null    float64
 5   Branch Lat    500 non-null    float64
dtypes: float64(4), int64(1), object(1)
memory usage: 27.3+ KB


Unnamed: 0,Customer,Address Long,Address Lat,Branch,Branch Long,Branch Lat
0,1,0.74707,51.549751,London,-0.109863,51.481383
1,1,0.74707,51.549751,Oxford,-1.252441,51.727028
2,1,0.74707,51.549751,Nottingham,-1.142578,52.948638
3,1,0.74707,51.549751,Newcastle,-1.604004,54.965002
4,2,0.406494,51.957807,London,-0.109863,51.481383


In [160]:
x = math.pi/180
df['Distance'] = 0
for i in range(len(df)):
    #  from degree to radian
    df.loc[i,'Address Long'] = df.loc[i,'Address Long'] * x
    df.loc[i,'Address Lat'] = df.loc[i,'Address Lat'] * x
    df.loc[i,'Branch Long'] = df.loc[i,'Branch Long'] * x
    df.loc[i,'Branch Lat'] = df.loc[i,'Branch Lat'] * x
    #  calculate distance 
    #  3963 * acos((sin(lat1) * sin(lat2)) + cos(lat1) * cos(lat2) * cos(long2 – long1))
    dist = 3963 * math.acos((math.sin( df.loc[i,'Address Lat']) * 
                                             math.sin(df.loc[i,'Branch Lat'])) + 
                                             math.cos(df.loc[i,'Address Lat']) * 
                                             math.cos(df.loc[i,'Branch Lat']) * 
                                             math.cos( df.loc[i,'Branch Long'] - df.loc[i,'Address Long']))

    df.loc[i,'Distance'] = round(dist,2)

df.head(5)

Unnamed: 0,Customer,Address Long,Address Lat,Branch,Branch Long,Branch Lat,Distance
0,1,0.013039,0.899713,London,-0.001917,0.89852,37.19
1,1,0.013039,0.899713,Oxford,-0.021859,0.902807,86.7
2,1,0.013039,0.899713,Nottingham,-0.019942,0.924128,125.55
3,1,0.013039,0.899713,Newcastle,-0.027995,0.95932,255.43
4,2,0.007095,0.906835,London,-0.001917,0.89852,39.69


### Find the closest Branch for each Customer
        Make sure Distance is rounded to 2 decimal places

In [161]:
temp = df.groupby(df.columns.tolist()[:1])['Distance'].min().reset_index()
temp.info()
temp.head(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Customer  125 non-null    int64  
 1   Distance  125 non-null    float64
dtypes: float64(1), int64(1)
memory usage: 2.1 KB


Unnamed: 0,Customer,Distance
0,1,37.19
1,2,39.69
2,3,44.82
3,4,175.98
4,5,33.63


In [162]:
df = temp.merge(df, how='left', on='Distance')
df.info()
df.head(5)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 127 entries, 0 to 126
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Customer_x    127 non-null    int64  
 1   Distance      127 non-null    float64
 2   Customer_y    127 non-null    int64  
 3   Address Long  127 non-null    float64
 4   Address Lat   127 non-null    float64
 5   Branch        127 non-null    object 
 6   Branch Long   127 non-null    float64
 7   Branch Lat    127 non-null    float64
dtypes: float64(5), int64(2), object(1)
memory usage: 8.9+ KB


Unnamed: 0,Customer_x,Distance,Customer_y,Address Long,Address Lat,Branch,Branch Long,Branch Lat
0,1,37.19,1,0.013039,0.899713,London,-0.001917,0.89852
1,2,39.69,2,0.007095,0.906835,London,-0.001917,0.89852
2,3,44.82,3,-0.002493,0.887215,London,-0.001917,0.89852
3,4,175.98,4,-0.080989,0.878526,Oxford,-0.021859,0.902807
4,5,33.63,5,-0.027803,0.895169,Oxford,-0.021859,0.902807


In [163]:
df.rename(columns={'Customer_x': 'Customer'}, inplace=True)
df.drop(columns=['Customer_y'], inplace=True)
df.head(5)

Unnamed: 0,Customer,Distance,Address Long,Address Lat,Branch,Branch Long,Branch Lat
0,1,37.19,0.013039,0.899713,London,-0.001917,0.89852
1,2,39.69,0.007095,0.906835,London,-0.001917,0.89852
2,3,44.82,-0.002493,0.887215,London,-0.001917,0.89852
3,4,175.98,-0.080989,0.878526,Oxford,-0.021859,0.902807
4,5,33.63,-0.027803,0.895169,Oxford,-0.021859,0.902807


### For each Branch, assign a Customer Priority rating, the closest customer having a rating of 1

In [164]:
df['Rank'] = 0
df['Rank'] = df.groupby('Branch')['Distance'].rank('min')
df.info()
df.head(5)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 127 entries, 0 to 126
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Customer      127 non-null    int64  
 1   Distance      127 non-null    float64
 2   Address Long  127 non-null    float64
 3   Address Lat   127 non-null    float64
 4   Branch        127 non-null    object 
 5   Branch Long   127 non-null    float64
 6   Branch Lat    127 non-null    float64
 7   Rank          127 non-null    float64
dtypes: float64(6), int64(1), object(1)
memory usage: 8.9+ KB


Unnamed: 0,Customer,Distance,Address Long,Address Lat,Branch,Branch Long,Branch Lat,Rank
0,1,37.19,0.013039,0.899713,London,-0.001917,0.89852,40.0
1,2,39.69,0.007095,0.906835,London,-0.001917,0.89852,42.0
2,3,44.82,-0.002493,0.887215,London,-0.001917,0.89852,43.0
3,4,175.98,-0.080989,0.878526,Oxford,-0.021859,0.902807,26.0
4,5,33.63,-0.027803,0.895169,Oxford,-0.021859,0.902807,9.0


In [169]:
rank_df = df.sort_values(['Branch','Rank'])
rank_df.head(5)

Unnamed: 0,Customer,Distance,Address Long,Address Lat,Branch,Branch Long,Branch Lat,Rank
59,58,2.84,-0.000767,0.89852,London,-0.001917,0.89852,1.0
121,120,3.97,-0.002732,0.899385,London,-0.001917,0.89852,2.0
120,119,5.36,-0.002157,0.897175,London,-0.001917,0.89852,3.0
122,121,5.8,-0.001103,0.899892,London,-0.001917,0.89852,4.0
62,61,7.2,-0.004602,0.899236,London,-0.001917,0.89852,5.0


In [171]:
rank_df.to_csv('output.csv')