In [38]:
from arcgis.features import GeoAccessor
import pandas as pd
import numpy as np
import sys
sys.path.append('../src/geoai_retail')
import utils

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [39]:
geo_fc = '../data/raw/raw.gdb/blocks'
geo_id_fld = 'GEOID'

normalize_csv = '../data/raw/blocks_enriched.csv'
normalize_id_fld = 'GEOID'
normalize_fld = 'wealth_tothh_cy'  # Total Households

closest_csv =  '../data/interim/closest_table.csv'
closest_factor_fld_root = 'trip_count'

trips_csv = '../data/raw/trips_od.csv'

dest_fc = '../data/raw/raw.gdb/coffee'
dest_id_fld = 'LOCNUM'

In [40]:
dest_df = GeoAccessor.from_featureclass(dest_fc)
dest_df = dest_df[['LOCNUM', 'CONAME', 'SHAPE']].copy()
dest_df['LOCNUM'] = dest_df['LOCNUM'].astype('int64')
dest_df.spatial.set_geometry('SHAPE')
dest_df.head()

Unnamed: 0,LOCNUM,CONAME,SHAPE
0,413963145,SNOW PEAK COFFEE CO,"{""x"": -122.99270000035045, ""y"": 44.77610000019..."
1,422168012,BAD GIRLS COFFEE,"{""x"": -122.96479999992164, ""y"": 44.79579999981..."
2,244210043,DUTCH BROTHERS COFFEE,"{""x"": -123.31560000023427, ""y"": 44.92460000005..."
3,396819021,STARBUCKS,"{""x"": -123.30960000042165, ""y"": 44.92999999968..."
4,396819062,STARBUCKS,"{""x"": -123.31230000023731, ""y"": 44.93040000014..."


In [41]:
dest_df.spatial.plot(renderer_type='u', col='CONAME')

MapView(layout=Layout(height='400px', width='100%'))

In [42]:
brand_df = dest_df.groupby('CONAME').count()[dest_id_fld].sort_values(ascending=False).to_frame()
brand_df.reset_index(inplace=True)
brand_df.columns = ['name', 'location_count']
brand_df.head(25)

Unnamed: 0,name,location_count
0,STARBUCKS,298
1,DUTCH BROTHERS COFFEE,49
2,UNCLE GARY'S COFFEE EMPORIUM,15
3,ALLEGRO COFFEE,7
4,HUMAN BEAN,7
5,BLACK ROCK COFFEE BAR,6
6,STUMPTOWN COFFEE ROASTERS,4
7,CITY COFFEE,4
8,BLACK ROCK COFFEE,3
9,BLUE MOON COFFEE,3


In [43]:
brand_df['category'] = brand_df.apply(lambda row: row['name'] if row['location_count'] > 1 else 'INDEPENDENT', axis=1)
brand_df.head(25)

Unnamed: 0,name,location_count,category
0,STARBUCKS,298,STARBUCKS
1,DUTCH BROTHERS COFFEE,49,DUTCH BROTHERS COFFEE
2,UNCLE GARY'S COFFEE EMPORIUM,15,UNCLE GARY'S COFFEE EMPORIUM
3,ALLEGRO COFFEE,7,ALLEGRO COFFEE
4,HUMAN BEAN,7,HUMAN BEAN
5,BLACK ROCK COFFEE BAR,6,BLACK ROCK COFFEE BAR
6,STUMPTOWN COFFEE ROASTERS,4,STUMPTOWN COFFEE ROASTERS
7,CITY COFFEE,4,CITY COFFEE
8,BLACK ROCK COFFEE,3,BLACK ROCK COFFEE
9,BLUE MOON COFFEE,3,BLUE MOON COFFEE


In [44]:
top_dest_df = brand_df.groupby('category').sum().sort_values('location_count', ascending=False)
top_dest_df

Unnamed: 0_level_0,location_count
category,Unnamed: 1_level_1
STARBUCKS,298
INDEPENDENT,246
DUTCH BROTHERS COFFEE,49
UNCLE GARY'S COFFEE EMPORIUM,15
HUMAN BEAN,7
ALLEGRO COFFEE,7
BLACK ROCK COFFEE BAR,6
CITY COFFEE,4
STUMPTOWN COFFEE ROASTERS,4
BLUE MOON COFFEE,3


In [45]:
dest_df = dest_df.join(brand_df[['name', 'category']].set_index('name'), on='CONAME')
dest_df.sample(5)

Unnamed: 0,LOCNUM,CONAME,SHAPE,category
368,708584554,LUCKMAN LLC,"{""x"": -122.73940000049328, ""y"": 45.90809999951...",INDEPENDENT
27,427940712,HUMAN BEAN,"{""x"": -123.03329999994935, ""y"": 44.88730000045...",HUMAN BEAN
454,413962364,STARBUCKS,"{""x"": -122.40959999952491, ""y"": 45.49870000016...",STARBUCKS
9,685312811,SLOW TRAIN COFFEE CO,"{""x"": -123.39529999997893, ""y"": 45.09749999962...",INDEPENDENT
289,415522504,FLOYD'S COFFEE OLD TOWN,"{""x"": -122.67170000037396, ""y"": 45.52359999968...",INDEPENDENT


In [46]:
dest_df.spatial.plot(renderer_type='u', col='category')

MapView(layout=Layout(height='400px', width='100%'))

In [47]:
closest_df = pd.read_csv(closest_csv, index_col=0)
closest_df.head()

Unnamed: 0,origin_id,destination_id_01,proximity_kilometers_01,proximity_minutes_01,destination_id_02,proximity_kilometers_02,proximity_minutes_02,destination_id_03,proximity_kilometers_03,proximity_minutes_03,destination_id_04,proximity_kilometers_04,proximity_minutes_04
0,410050201001000,998326755,1.548381,3.355589,236771176,1.716985,3.834511,731023672,1.776342,3.778623,895875482,1.880792,4.141975
1,410050201001001,998326755,1.358351,2.816526,236771176,1.526954,3.295449,731023672,1.586312,3.23956,895875482,1.690762,3.602913
2,410050201001002,998326755,1.683183,3.781076,236771176,1.851786,4.259999,731023672,1.911143,4.204111,895875482,2.015593,4.567463
3,410050201001003,998326755,1.480122,3.140135,236771176,1.648726,3.619058,731023672,1.708083,3.56317,895875482,1.812533,3.926522
4,410050201001004,998326755,1.577137,2.642447,236771176,1.745741,3.12137,731023672,1.805098,3.065482,895875482,1.909548,3.428834


In [48]:
trips_df = pd.read_csv(trips_csv, usecols=['origin_id', 'destination_id', 'trip_count'])
trips_df.head()

Unnamed: 0,origin_id,destination_id,trip_count
0,410050201001002,421983243,1
1,410050201001002,520835364,3
2,410050201001002,585558430,1
3,410050201001002,637497553,1
4,410050201001002,721916379,1


In [49]:
closest_trip_df = utils.add_metric_by_origin_dest(closest_df, trips_df, 'trip_count', 0)
closest_trip_df.head()

Unnamed: 0,origin_id,destination_id_01,proximity_kilometers_01,proximity_minutes_01,destination_id_02,proximity_kilometers_02,proximity_minutes_02,destination_id_03,proximity_kilometers_03,proximity_minutes_03,destination_id_04,proximity_kilometers_04,proximity_minutes_04,trip_count_01,trip_count_02,trip_count_03,trip_count_04
0,410050201001000,998326755,1.548381,3.355589,236771176,1.716985,3.834511,731023672,1.776342,3.778623,895875482,1.880792,4.141975,0.0,0.0,0.0,0.0
1,410050201001001,998326755,1.358351,2.816526,236771176,1.526954,3.295449,731023672,1.586312,3.23956,895875482,1.690762,3.602913,0.0,0.0,0.0,0.0
2,410050201001002,998326755,1.683183,3.781076,236771176,1.851786,4.259999,731023672,1.911143,4.204111,895875482,2.015593,4.567463,0.0,0.0,0.0,0.0
3,410050201001003,998326755,1.480122,3.140135,236771176,1.648726,3.619058,731023672,1.708083,3.56317,895875482,1.812533,3.926522,0.0,0.0,0.0,0.0
4,410050201001004,998326755,1.577137,2.642447,236771176,1.745741,3.12137,731023672,1.805098,3.065482,895875482,1.909548,3.428834,0.0,0.0,0.0,0.0


In [50]:
trip_count_summ_df = closest_trip_df[['destination_id_01', 'trip_count_01']].groupby('destination_id_01').sum()
trip_count_summ_df.head()

Unnamed: 0_level_0,trip_count_01
destination_id_01,Unnamed: 1_level_1
105830012,202.0
105971188,233.0
177692910,115.0
180308389,24.0
201048063,332.0


In [51]:
dest_df = dest_df.join(trip_count_summ_df, on=dest_id_fld).sort_values('trip_count_01', ascending=False)
dest_df.sample(5)

Unnamed: 0,LOCNUM,CONAME,SHAPE,category,trip_count_01
257,523572634,STARBUCKS,"{""x"": -122.6755000001886, ""y"": 45.513300000242...",STARBUCKS,114.0
509,558222485,STARBUCKS,"{""x"": -122.65110000048384, ""y"": 45.54889999966...",STARBUCKS,310.0
561,725700857,BLACK ROCK COFFEE BAR,"{""x"": -122.47550000010045, ""y"": 45.50510000043...",BLACK ROCK COFFEE BAR,
4,396819062,STARBUCKS,"{""x"": -123.31230000023731, ""y"": 44.93040000014...",STARBUCKS,336.0
526,708242982,PLATINUM GIRL INC,"{""x"": -122.57969999991337, ""y"": 45.50510000043...",INDEPENDENT,127.0


In [52]:
dest_df.groupby('category').sum().sort_values('trip_count_01', ascending=False)['trip_count_01'].to_frame()

Unnamed: 0_level_0,trip_count_01
category,Unnamed: 1_level_1
STARBUCKS,57886.0
INDEPENDENT,24041.0
DUTCH BROTHERS COFFEE,4595.0
UNCLE GARY'S COFFEE EMPORIUM,1953.0
ALLEGRO COFFEE,1682.0
HUMAN BEAN,815.0
BLACK ROCK COFFEE BAR,670.0
BLACKROCK COFFEE,645.0
AVA ROASTERIA,606.0
STUMPTOWN COFFEE ROASTERS,313.0
