In [1]:
import pandas as pd
import numpy as np

In [66]:
locations = pd.read_excel('Datasets/School locations.xlsx')

In [4]:
df = pd.read_csv('Datasets/survey_client_with_churn.csv')

In [5]:
customers_list = list(df['Customer heading'].unique())
len(customers_list)

821

In [67]:
locations = locations[locations['Customer heading'].isin(customers_list)]
locations = locations.drop(['Province', 'Region', 'Town'], axis=1)

In [68]:
locations['Current Seller id'] = locations['Current Seller id'].astype(int)

#### Number of customers each Seller serve

In [69]:
num_customers = locations.groupby('Current Seller id')['Customer heading'].count()

In [25]:
locations

Unnamed: 0,Customer heading,Latitude,Longitude,Current Seller id,Institution,Titularity
7315,136369,40.428274,-3.681298,208,No,Catholic
7316,136370,40.408230,-3.676975,208,No,Catholic
7317,136372,40.372197,-3.760104,208,No,Catholic
7318,136377,40.603361,-3.712042,117,Yes,Public
7319,136378,40.242364,-3.704871,189,No,Catholic
...,...,...,...,...,...,...
20091,212529,40.444196,-3.701910,208,No,Catholic
20111,212675,40.432243,-3.694464,161,No,Catholic
20116,212705,40.462455,-3.776128,189,Yes,Private
20118,212717,40.440356,-3.628420,209,No,Private


In [70]:
locations['Num customers of seller'] = locations.replace({'Current Seller id': num_customers})['Current Seller id']

#### Area and density that each Seller serve

In [34]:
def shoelace_area_polygon(x,y): # shoelace formula
    return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

In [47]:
sellers_list = list(locations['Current Seller id'].unique())

area_covered = {}

for seller in sellers_list:
    lat = locations.loc[locations['Current Seller id'] == seller, 'Latitude'].values
    long = locations.loc[locations['Current Seller id'] == seller, 'Longitude'].values
    area = shoelace_area_polygon(lat, long)
    area_covered[seller] = area

In [71]:
locations['Area covered by seller'] = locations.replace({'Current Seller id': area_covered})['Current Seller id']

In [72]:
locations.head()

Unnamed: 0,Customer heading,Latitude,Longitude,Current Seller id,Institution,Titularity,Num customers of seller,Area covered by seller
7315,136369,40.428274,-3.681298,208,No,Catholic,47,0.026531
7316,136370,40.40823,-3.676975,208,No,Catholic,47,0.026531
7317,136372,40.372197,-3.760104,208,No,Catholic,47,0.026531
7318,136377,40.603361,-3.712042,117,Yes,Public,58,0.193443
7319,136378,40.242364,-3.704871,189,No,Catholic,57,0.028342


#### Titularity portfolio % each Seller serve

In [83]:
catholic_covered = {}
private_covered = {}

for seller in sellers_list:
    catholic_covered[seller] = locations[locations['Current Seller id'] == 100].Titularity.value_counts()['Catholic']
    private_covered[seller] = locations[locations['Current Seller id'] == 100].Titularity.value_counts()['Private']

In [82]:
locations.groupby('Titularity')['Customer heading'].count()

Titularity
Catholic    178
Private     130
Public      513
Name: Customer heading, dtype: int64

In [85]:
locations

Unnamed: 0,Customer heading,Latitude,Longitude,Current Seller id,Institution,Titularity,Num customers of seller,Area covered by seller
7315,136369,40.428274,-3.681298,208,No,Catholic,47,0.026531
7316,136370,40.408230,-3.676975,208,No,Catholic,47,0.026531
7317,136372,40.372197,-3.760104,208,No,Catholic,47,0.026531
7318,136377,40.603361,-3.712042,117,Yes,Public,58,0.193443
7319,136378,40.242364,-3.704871,189,No,Catholic,57,0.028342
...,...,...,...,...,...,...,...,...
20091,212529,40.444196,-3.701910,208,No,Catholic,47,0.026531
20111,212675,40.432243,-3.694464,161,No,Catholic,65,0.016538
20116,212705,40.462455,-3.776128,189,Yes,Private,57,0.028342
20118,212717,40.440356,-3.628420,209,No,Private,53,0.028683


In [86]:
locations['Seller portfolio - Catholic (abs)'] = locations.replace({'Current Seller id': catholic_covered})['Current Seller id'] / 178
locations['Seller portfolio - Private (abs)'] = locations.replace({'Current Seller id': private_covered})['Current Seller id'] / 130

locations['Seller portfolio - Catholic (relative)'] = locations.replace({'Current Seller id': catholic_covered})['Current Seller id'] / locations['Num customers of seller']
locations['Seller portfolio - Private (relative)'] = locations.replace({'Current Seller id': private_covered})['Current Seller id'] / locations['Num customers of seller']

#### Institution % of Seller

In [122]:
locations.groupby('Institution')['Customer heading'].count()

Institution
No     228
Yes    593
Name: Customer heading, dtype: int64

In [126]:
institution_yes = locations[locations.Institution=='Yes'].groupby(['Current Seller id'])['Customer heading'].count()

locations['Institution = Yes % (relative)'] = locations.replace({'Current Seller id': institution_yes})['Current Seller id'] / locations['Num customers of seller']
locations['Institution = Yes % (abs)'] = locations.replace({'Current Seller id': institution_yes})['Current Seller id'] / 593                                                                                           

In [128]:
locations = locations.drop(['Latitude', 'Longitude', 'Current Seller id'], axis=1)

Unnamed: 0,Customer heading,Institution,Titularity,Num customers of seller,Area covered by seller,Seller portfolio - Catholic (abs),Seller portfolio - Private (abs),Seller portfolio - Catholic (relative),Seller portfolio - Private (relative),Institution = Yes % (relative),Institution = Yes % (abs)
7315,136369,No,Catholic,47,0.026531,0.039326,0.023077,0.148936,0.063830,0.468085,0.037099
7316,136370,No,Catholic,47,0.026531,0.039326,0.023077,0.148936,0.063830,0.468085,0.037099
7317,136372,No,Catholic,47,0.026531,0.039326,0.023077,0.148936,0.063830,0.468085,0.037099
7318,136377,Yes,Public,58,0.193443,0.039326,0.023077,0.120690,0.051724,0.879310,0.086003
7319,136378,No,Catholic,57,0.028342,0.039326,0.023077,0.122807,0.052632,0.473684,0.045531
...,...,...,...,...,...,...,...,...,...,...,...
20091,212529,No,Catholic,47,0.026531,0.039326,0.023077,0.148936,0.063830,0.468085,0.037099
20111,212675,No,Catholic,65,0.016538,0.039326,0.023077,0.107692,0.046154,0.230769,0.025295
20116,212705,Yes,Private,57,0.028342,0.039326,0.023077,0.122807,0.052632,0.473684,0.045531
20118,212717,No,Private,53,0.028683,0.039326,0.023077,0.132075,0.056604,0.603774,0.053963
