In [1]:
%matplotlib inline

from arcgis.features import GeoAccessor
from arcgis.geometry import Geometry
import pandas as pd
import os
import featuretools as ft
import numpy as np
import itertools

gdb_int = r'D:\\projects\\geoai-retail\\data\\interim\\interim.gdb'
block_group_fc = os.path.join(gdb_int, 'block_groups_enriched')
trips_fc = os.path.join(gdb_int, 'trips')

gdb_raw = r'D:\\projects\\geoai-retail\\data\\raw\\raw.gdb'
stores_fc = os.path.join(gdb_raw, 'coffee')

In [2]:
block_group_df = GeoAccessor.from_featureclass(block_group_fc)
block_group_df.drop(['OBJECTID', 'NAME', 'aggregationMethod'], inplace=True, axis=1)
block_group_df.set_index('ID', inplace=True, drop=True)

trips_df = GeoAccessor.from_featureclass(trips_fc)
trips_df.drop('OBJECTID', inplace=True, axis=1)
trips_df.reset_index(inplace=True, drop=True)

stores_df = GeoAccessor.from_featureclass(stores_fc)
stores_df = stores_df[['LOCNUM', 'CONAME', 'SHAPE']].copy()
stores_df.columns = ['locnum', 'store_name', 'SHAPE']

In [3]:
stores_df.sample(5)

Unnamed: 0,locnum,store_name,SHAPE
427,708274207,DUTCH BROTHERS COFFEE,"{""x"": -122.6398999997669, ""y"": 45.490500000354..."
351,724148139,FARM HOUSE COFFEE,"{""x"": -123.20780000033375, ""y"": 46.10530000042..."
321,637497264,STARBUCKS,"{""x"": -122.68059999972934, ""y"": 45.57619999991..."
350,720832388,FELIDA COFFEE CO,"{""x"": -122.70839999979461, ""y"": 45.70729999951..."
224,660912353,UNCLE GARY'S COFFEE EMPORIUM,"{""x"": -122.87539999991321, ""y"": 45.53400000049..."


In [4]:
store_count_df = stores_df[['store_name', 'locnum']].groupby('store_name').count()
store_count_df.columns = ['store_count']
store_count_df.reset_index(inplace=True)

store_other_df = store_count_df[store_count_df['store_count'] <= 3]
store_other_count = store_other_df['store_count'].sum()
row = pd.DataFrame([['OTHER', store_other_count]], columns=store_count_df.columns)

store_count_df = store_count_df[store_count_df['store_count'] > 3].copy()
store_count_df = store_count_df.append(row)

store_count_df.sort_values('store_count', ascending=False)

Unnamed: 0,store_name,store_count
234,STARBUCKS,298
0,OTHER,272
87,DUTCH BROTHERS COFFEE,49
252,UNCLE GARY'S COFFEE EMPORIUM,15
2,ALLEGRO COFFEE,7
129,HUMAN BEAN,7
17,BLACK ROCK COFFEE BAR,6
38,CITY COFFEE,4
237,STUMPTOWN COFFEE ROASTERS,4


In [5]:
stores_df['store_name_category'] = stores_df.store_name.apply(lambda val: val if val in store_count_df.store_name.values else 'OTHER')
stores_df.sample(5)

Unnamed: 0,locnum,store_name,SHAPE,store_name_category
549,637497488,STARBUCKS,"{""x"": -122.53850000013324, ""y"": 45.52490000041...",STARBUCKS
221,243254026,STARBUCKS,"{""x"": -122.8656000002859, ""y"": 45.537500000220...",STARBUCKS
152,257782771,STARBUCKS,"{""x"": -122.72289999950851, ""y"": 45.40939999952...",STARBUCKS
535,719397681,ART HAUS CAFE,"{""x"": -122.57950000018627, ""y"": 45.52840000013...",OTHER
56,402194881,HOT SHOTS,"{""x"": -122.86569999964945, ""y"": 44.84149999995...",OTHER


In [6]:
block_group_df.sample(5)

Unnamed: 0_level_0,gender_pop0_cy,gender_pop5_cy,gender_pop10_cy,gender_pop15_cy,gender_pop20_cy,gender_pop25_cy,gender_pop30_cy,gender_pop35_cy,gender_pop40_cy,gender_pop45_cy,...,educationalattainment_ged_cy,educationalattainment_smcoll_cy,educationalattainment_asscdeg_cy,educationalattainment_bachdeg_cy,educationalattainment_graddeg_cy,educationalattainment_educbasecy,households_acshhbpov,households_acshhapov,households_acsbpovmcf,SHAPE
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
410579608001,37.0,42.0,46.0,44.0,31.0,30.0,37.0,49.0,55.0,54.0,...,11.0,254.0,31.0,65.0,103.0,777.0,92.0,355.0,53.0,"{""rings"": [[[-123.86125999999996, 45.230400000..."
530050114025,81.0,90.0,91.0,96.0,81.0,74.0,79.0,82.0,83.0,94.0,...,24.0,328.0,162.0,128.0,25.0,1078.0,13.0,594.0,6.0,"{""rings"": [[[-119.09683999999999, 46.183570000..."
530530714092,69.0,67.0,61.0,61.0,69.0,60.0,56.0,66.0,59.0,54.0,...,64.0,175.0,28.0,8.0,0.0,541.0,67.0,233.0,7.0,"{""rings"": [[[-122.40965999999997, 47.104230000..."
410470026001,62.0,66.0,60.0,59.0,57.0,59.0,50.0,48.0,48.0,58.0,...,18.0,178.0,44.0,117.0,51.0,725.0,38.0,301.0,20.0,"{""rings"": [[[-122.88585999999998, 44.928690000..."
410390007073,67.0,67.0,58.0,42.0,57.0,60.0,74.0,46.0,60.0,84.0,...,49.0,307.0,56.0,199.0,91.0,1056.0,138.0,595.0,19.0,"{""rings"": [[[-124.08220999999998, 43.973190000..."


In [7]:
trips_df.sample(5)

Unnamed: 0,trip_distance_miles,trip_time_minutes,destination_id,origin_id,SHAPE
1125596,205.420282,213.216667,970020814,530050115013,"{""x"": -119.09667499999995, ""y"": 46.20512400000..."
1081764,13.271863,18.366667,523572642,530110410081,"{""x"": -122.69991999999996, ""y"": 45.69114400000..."
1053472,12.074481,21.25,723657453,530110406081,"{""x"": -122.47361699999999, ""y"": 45.62637400000..."
623207,11.564336,28.2,211564646,410510055002,"{""x"": -122.69057399999997, ""y"": 45.51712300000..."
541993,6.437404,24.633333,715074000,410510056003,"{""x"": -122.68274399999996, ""y"": 45.50928700000..."


In [8]:
trips_df.trip_distance_miles.median()

2.625913846

In [9]:
trips_mean_df = trips_df.groupby(['origin_id', 'destination_id']).median()
trips_sum_df = trips_df[['trip_distance_miles', 'origin_id', 'destination_id']].groupby(['origin_id', 'destination_id']).count()
trips_sum_df.columns = ['trip_count']
trips_stats_df = trips_mean_df.join(trips_sum_df)
trips_stats_df.reset_index(inplace=True)
trips_stats_df.sample(5)

Unnamed: 0,origin_id,destination_id,trip_distance_miles,trip_time_minutes,trip_count
48386,410510020002,243254414,11.290311,24.633333,1
17827,410050229061,243254257,29.727631,45.483333,1
52290,410510025021,721758160,3.690944,14.791667,8
85315,410510096042,367407582,18.710102,51.433333,1
139936,530150016005,670911619,5.831256,8.325,14


In [10]:
block_group_sum_df = trips_stats_df.join(block_group_df, on='origin_id')
block_group_sum_df.head()

Unnamed: 0,origin_id,destination_id,trip_distance_miles,trip_time_minutes,trip_count,gender_pop0_cy,gender_pop5_cy,gender_pop10_cy,gender_pop15_cy,gender_pop20_cy,...,educationalattainment_ged_cy,educationalattainment_smcoll_cy,educationalattainment_asscdeg_cy,educationalattainment_bachdeg_cy,educationalattainment_graddeg_cy,educationalattainment_educbasecy,households_acshhbpov,households_acshhapov,households_acsbpovmcf,SHAPE
0,60930004001,219649035,311.607615,272.6,1,19.0,20.0,39.0,19.0,23.0,...,14.0,169.0,37.0,37.0,34.0,507.0,42.0,226.0,22.0,"{'rings': [[[-122.368049999, 42.00933000000003..."
1,60930004001,718176685,261.898556,270.266667,1,19.0,20.0,39.0,19.0,23.0,...,14.0,169.0,37.0,37.0,34.0,507.0,42.0,226.0,22.0,"{'rings': [[[-122.368049999, 42.00933000000003..."
2,60930004001,723838910,311.607615,272.6,1,19.0,20.0,39.0,19.0,23.0,...,14.0,169.0,37.0,37.0,34.0,507.0,42.0,226.0,22.0,"{'rings': [[[-122.368049999, 42.00933000000003..."
3,410019502002,105830012,304.35373,276.283333,1,78.0,75.0,71.0,72.0,78.0,...,51.0,264.0,18.0,128.0,30.0,792.0,170.0,294.0,19.0,"{'rings': [[[-117.82904999999994, 44.777120000..."
4,410019502002,732273983,304.35373,276.283333,1,78.0,75.0,71.0,72.0,78.0,...,51.0,264.0,18.0,128.0,30.0,792.0,170.0,294.0,19.0,"{'rings': [[[-117.82904999999994, 44.777120000..."


In [11]:
import arcpy
block_group_column_df = pd.DataFrame([(field.name, field.aliasName) for field in arcpy.ListFields(block_group_fc)], columns=['name', 'alias'])
block_group_column_df.sample(5)

Unnamed: 0,name,alias
912,agebyracebysex_aim70_cy,2017 American Indian Males 70-74
37,gender_male45_cy,2017 Males Age 45-49
193,disposableincome_a45di100cy,2017 HHr 45-54/Disposable Inc $100K-149999
247,householdincome_hinc200_cy,2017 HH Income $200000+
416,networth_mednwa55cy,2017 Median Net Worth: HHr 55-64


In [12]:
hh_col_alias = [v for v in block_group_column_df.alias if 'Total Households' in v][0]
hh_col = block_group_column_df[block_group_column_df.alias == hh_col_alias].name.values[0]
hh_col

'householdtotals_tothh_cy'

In [13]:
block_group_sum_df['trips_market_penetration'] = block_group_sum_df['trip_count'] / block_group_sum_df[hh_col]
block_group_sum_df.sample(5)

Unnamed: 0,origin_id,destination_id,trip_distance_miles,trip_time_minutes,trip_count,gender_pop0_cy,gender_pop5_cy,gender_pop10_cy,gender_pop15_cy,gender_pop20_cy,...,educationalattainment_smcoll_cy,educationalattainment_asscdeg_cy,educationalattainment_bachdeg_cy,educationalattainment_graddeg_cy,educationalattainment_educbasecy,households_acshhbpov,households_acshhapov,households_acsbpovmcf,SHAPE,trips_market_penetration
95560,410670305013,415566989,8.515268,15.633333,11,89.0,86.0,89.0,83.0,105.0,...,353.0,32.0,360.0,132.0,1209.0,78.0,734.0,29.0,"{'rings': [[[-122.76981099999995, 45.470784000...",0.014342
2971,410050203041,637457938,6.276468,18.666667,1,115.0,154.0,186.0,173.0,119.0,...,180.0,117.0,518.0,489.0,1476.0,30.0,787.0,13.0,"{'rings': [[[-122.73335999999995, 45.433200000...",0.001346
92198,410670301012,626749303,5.134699,14.783333,2,21.0,27.0,31.0,25.0,24.0,...,118.0,23.0,253.0,237.0,720.0,40.0,335.0,13.0,"{'rings': [[[-122.76449999999994, 45.519550000...",0.004525
91840,410659704001,523572642,79.555372,86.066667,1,47.0,42.0,47.0,65.0,59.0,...,109.0,114.0,90.0,33.0,669.0,77.0,269.0,0.0,"{'rings': [[[-121.19605999999999, 45.620650000...",0.002375
103224,410670315044,637497520,15.057062,23.516667,1,152.0,162.0,176.0,144.0,135.0,...,529.0,100.0,704.0,348.0,1942.0,43.0,987.0,12.0,"{'rings': [[[-122.87875999999994, 45.559650000...",0.000912


In [14]:
block_groups_sum_df = block_group_sum_df[(block_group_sum_df.trips_market_penetration != np.inf) & (block_group_sum_df.trips_market_penetration > 0)].copy()
block_groups_sum_df.sample(5)

Unnamed: 0,origin_id,destination_id,trip_distance_miles,trip_time_minutes,trip_count,gender_pop0_cy,gender_pop5_cy,gender_pop10_cy,gender_pop15_cy,gender_pop20_cy,...,educationalattainment_smcoll_cy,educationalattainment_asscdeg_cy,educationalattainment_bachdeg_cy,educationalattainment_graddeg_cy,educationalattainment_educbasecy,households_acshhbpov,households_acshhapov,households_acsbpovmcf,SHAPE,trips_market_penetration
107399,410670316113,524877610,22.83352,25.25,1,92.0,94.0,82.0,60.0,79.0,...,159.0,40.0,342.0,220.0,899.0,53.0,470.0,39.0,"{'rings': [[[-122.82778699999994, 45.526985000...",0.001969
113583,410670319121,238531206,9.754903,24.266667,1,175.0,183.0,164.0,113.0,98.0,...,329.0,221.0,328.0,131.0,1374.0,71.0,641.0,49.0,"{'rings': [[[-122.78155999999996, 45.438990000...",0.00133
74466,410510064022,725092054,3.090699,9.183333,8,145.0,152.0,179.0,199.0,204.0,...,345.0,123.0,845.0,730.0,2186.0,33.0,931.0,0.0,"{'rings': [[[-122.67330999999996, 45.432790000...",0.0067
129275,530110409071,499703551,15.713851,33.116667,2,107.0,171.0,259.0,249.0,136.0,...,389.0,157.0,561.0,499.0,1886.0,16.0,851.0,16.0,"{'rings': [[[-122.70724999999999, 45.722860000...",0.002188
96704,410670307002,415522503,11.394701,21.858333,2,81.0,71.0,56.0,56.0,99.0,...,176.0,50.0,213.0,70.0,770.0,26.0,395.0,6.0,"{'rings': [[[-122.74410018299994, 45.434451956...",0.003846


In [15]:
# block_group_column_df.spatial.to_table(os.path.join(gdb_int, 'block_group_alias_table'))
# block_group_sum_df.spatial.to_featureclass(os.path.join(gdb_int, 'block_group_summary_by_store'))

In [16]:
stores_df.head()

Unnamed: 0,locnum,store_name,SHAPE,store_name_category
0,413963145,SNOW PEAK COFFEE CO,"{""x"": -122.99270000035045, ""y"": 44.77610000019...",OTHER
1,422168012,BAD GIRLS COFFEE,"{""x"": -122.96479999992164, ""y"": 44.79579999981...",OTHER
2,244210043,DUTCH BROTHERS COFFEE,"{""x"": -123.31560000023427, ""y"": 44.92460000005...",DUTCH BROTHERS COFFEE
3,396819021,STARBUCKS,"{""x"": -123.30960000042165, ""y"": 44.92999999968...",STARBUCKS
4,396819062,STARBUCKS,"{""x"": -123.31230000023731, ""y"": 44.93040000014...",STARBUCKS


In [17]:
stores_df.locnum = stores_df.locnum.astype('int64')

In [18]:
trips_stats_df.head()

Unnamed: 0,origin_id,destination_id,trip_distance_miles,trip_time_minutes,trip_count
0,60930004001,219649035,311.607615,272.6,1
1,60930004001,718176685,261.898556,270.266667,1
2,60930004001,723838910,311.607615,272.6,1
3,410019502002,105830012,304.35373,276.283333,1
4,410019502002,732273983,304.35373,276.283333,1


In [56]:
trips_coffee_cat_df = trips_stats_df.join(stores_df.set_index('locnum', drop=True), on='destination_id')
trips_coffee_cat_df = trips_coffee_cat_df[['origin_id', 'destination_id', 'trip_distance_miles', 'trip_time_minutes', 'trip_count', 'store_name_category']].copy()
trips_coffee_cat_df.head()

Unnamed: 0,origin_id,destination_id,trip_distance_miles,trip_time_minutes,trip_count,store_name_category
0,60930004001,219649035,311.607615,272.6,1,UNCLE GARY'S COFFEE EMPORIUM
1,60930004001,718176685,261.898556,270.266667,1,STARBUCKS
2,60930004001,723838910,311.607615,272.6,1,ALLEGRO COFFEE
3,410019502002,105830012,304.35373,276.283333,1,OTHER
4,410019502002,732273983,304.35373,276.283333,1,OTHER


In [None]:
trips_coffee_cat_df[trips_coffee_cat_df.store_name_category == 'STARBUCKS'].sort_values('trip_count', ascending=False).head(20)

In [57]:
trips_coffee_cat_df.set_index(['origin_id', 'destination_id'], inplace=True)
trips_coffee_cat_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,trip_distance_miles,trip_time_minutes,trip_count,store_name_category
origin_id,destination_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
60930004001,219649035,311.607615,272.6,1,UNCLE GARY'S COFFEE EMPORIUM
60930004001,718176685,261.898556,270.266667,1,STARBUCKS
60930004001,723838910,311.607615,272.6,1,ALLEGRO COFFEE
410019502002,105830012,304.35373,276.283333,1,OTHER
410019502002,732273983,304.35373,276.283333,1,OTHER


In [59]:
bg_lookup_df = block_group_sum_df[['origin_id', 'destination_id', 'trips_market_penetration']]
bg_lookup_df.set_index(['origin_id', 'destination_id'], inplace=True)
bg_lookup_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,trips_market_penetration
origin_id,destination_id,Unnamed: 2_level_1
60930004001,219649035,0.003344
60930004001,718176685,0.003344
60930004001,723838910,0.003344
410019502002,105830012,0.002008
410019502002,732273983,0.002008


In [60]:
trips_coffee_cat = trips_coffee_cat_df.join(bg_lookup_df)
trips_coffee_cat.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,trip_distance_miles,trip_time_minutes,trip_count,store_name_category,trips_market_penetration
origin_id,destination_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
60930004001,219649035,311.607615,272.6,1,UNCLE GARY'S COFFEE EMPORIUM,0.003344
60930004001,718176685,261.898556,270.266667,1,STARBUCKS,0.003344
60930004001,723838910,311.607615,272.6,1,ALLEGRO COFFEE,0.003344
410019502002,105830012,304.35373,276.283333,1,OTHER,0.002008
410019502002,732273983,304.35373,276.283333,1,OTHER,0.002008
410019503002,403048346,302.264059,257.2,1,OTHER,0.001848
410019503002,404389476,289.878892,252.9,1,OTHER,0.001848
410019503002,724624345,289.878892,252.9,1,STARBUCKS,0.001848
410019503002,970020814,302.264059,257.2,1,STARBUCKS,0.001848
410030001001,435874684,83.395445,84.616667,1,STARBUCKS,0.001656


In [61]:
trips_coffee_cat.reset_index(inplace=True)
trips_coffee_cat.head(10)

Unnamed: 0,origin_id,destination_id,trip_distance_miles,trip_time_minutes,trip_count,store_name_category,trips_market_penetration
0,60930004001,219649035,311.607615,272.6,1,UNCLE GARY'S COFFEE EMPORIUM,0.003344
1,60930004001,718176685,261.898556,270.266667,1,STARBUCKS,0.003344
2,60930004001,723838910,311.607615,272.6,1,ALLEGRO COFFEE,0.003344
3,410019502002,105830012,304.35373,276.283333,1,OTHER,0.002008
4,410019502002,732273983,304.35373,276.283333,1,OTHER,0.002008
5,410019503002,403048346,302.264059,257.2,1,OTHER,0.001848
6,410019503002,404389476,289.878892,252.9,1,OTHER,0.001848
7,410019503002,724624345,289.878892,252.9,1,STARBUCKS,0.001848
8,410019503002,970020814,302.264059,257.2,1,STARBUCKS,0.001848
9,410030001001,435874684,83.395445,84.616667,1,STARBUCKS,0.001656


In [99]:
store_category = "UNCLE GARY'S COFFEE EMPORIUM"
origin_id = '410510057002'

def get_trips_single_cat(category, origin, proximity_metric_fields, proximity_sort_field, measurement_metric_field, count_threshold=3):
    
    proximity_metric_fields = ['trip_distance_miles', 'trip_time_minutes']
    proximity_sort_field = 'trip_distance_miles'
    measurement_metric_field = 'market_penetration'

    trips_single_cat = trips_coffee_cat[
        (trips_coffee_cat.store_name_category == category) & (trips_coffee_cat.origin_id == origin)
    ].sort_values(proximity_sort_field)
    
    field_list = ['origin_id', 'destination_id'] + proximity_metric_fields + [measurement_metric_field]
    trips_single_cat = trips_single_cat[field_list][:count_threshold].copy()
        
    trips_single_cat = trips_single_cat.pivot_table(
        index='origin_id', 
        columns='destination_id'
    )

    dest_id_tuple_lst = [('trip_destination_id_{}_{:02d}'.format(category, idx+1), val) 
                        for idx, val in enumerate(trips_single_cat.columns.levels[1])]

    candidate_count_lst = ['{}_{:02d}'.format(category, idx+1) for idx in range(0, len(trips_single_cat.columns.levels[1]))]
    cols = ['_'.join(parts) for parts in itertools.product(trips_single_cat.columns.levels[0].values, candidate_count_lst)]
    trips_single_cat.columns = cols

    for col, val in dest_id_tuple_lst:
        trips_single_cat[col] = val
    
    return trips_single_cat

get_trips_single_cat(category, origin)

Unnamed: 0_level_0,trip_distance_miles_UNCLE GARY'S COFFEE EMPORIUM_01,trip_distance_miles_UNCLE GARY'S COFFEE EMPORIUM_02,trip_distance_miles_UNCLE GARY'S COFFEE EMPORIUM_03,trip_time_minutes_UNCLE GARY'S COFFEE EMPORIUM_01,trip_time_minutes_UNCLE GARY'S COFFEE EMPORIUM_02,trip_time_minutes_UNCLE GARY'S COFFEE EMPORIUM_03,trips_market_penetration_UNCLE GARY'S COFFEE EMPORIUM_01,trips_market_penetration_UNCLE GARY'S COFFEE EMPORIUM_02,trips_market_penetration_UNCLE GARY'S COFFEE EMPORIUM_03,trip_destination_id_UNCLE GARY'S COFFEE EMPORIUM_01,trip_destination_id_UNCLE GARY'S COFFEE EMPORIUM_02,trip_destination_id_UNCLE GARY'S COFFEE EMPORIUM_03
origin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
410510057002,1.068758,2.551349,0.759937,9.433333,10.716667,9.166667,0.083159,0.013277,0.083159,219649035,415522508,520835364


In [100]:
cat_id_df = trips_coffee_cat.groupby(['store_name_category', 'origin_id']).size().reset_index().rename(columns={0:'count'})
cat_id_df.sample(5)

Unnamed: 0,store_name_category,origin_id,count
7179,OTHER,410530202021,12
9026,STARBUCKS,410390021011,15
1859,BLACK ROCK COFFEE BAR,410670317046,1
9081,STARBUCKS,410390039002,8
194,ALLEGRO COFFEE,410099709003,2


In [111]:
cat_id_df.iloc[:1400]['store_name_category'].unique()

array(['ALLEGRO COFFEE', 'BLACK ROCK COFFEE BAR'], dtype=object)

In [127]:
# input_cat_id_df = cat_id_df[['store_name_category', 'origin_id']].sample(100)
input_cat_id_df = cat_id_df[['store_name_category', 'origin_id']]

trips_by_origin_cat_df_lst = input_cat_id_df.apply(lambda r: get_trips_single_cat(r[0], r[1]), axis=1)

trips_by_origin_cat_df = pd.concat(trips_by_origin_cat_df_lst.values, sort=False)

trips_by_origin_cat_df

Unnamed: 0_level_0,trip_distance_miles_ALLEGRO COFFEE_01,trip_time_minutes_ALLEGRO COFFEE_01,trips_market_penetration_ALLEGRO COFFEE_01,trip_destination_id_ALLEGRO COFFEE_01,trip_distance_miles_ALLEGRO COFFEE_02,trip_time_minutes_ALLEGRO COFFEE_02,trips_market_penetration_ALLEGRO COFFEE_02,trip_destination_id_ALLEGRO COFFEE_02,trip_distance_miles_ALLEGRO COFFEE_03,trip_time_minutes_ALLEGRO COFFEE_03,...,trips_market_penetration_UNCLE GARY'S COFFEE EMPORIUM_01,trip_destination_id_UNCLE GARY'S COFFEE EMPORIUM_01,trip_distance_miles_UNCLE GARY'S COFFEE EMPORIUM_02,trip_time_minutes_UNCLE GARY'S COFFEE EMPORIUM_02,trips_market_penetration_UNCLE GARY'S COFFEE EMPORIUM_02,trip_destination_id_UNCLE GARY'S COFFEE EMPORIUM_02,trip_distance_miles_UNCLE GARY'S COFFEE EMPORIUM_03,trip_time_minutes_UNCLE GARY'S COFFEE EMPORIUM_03,trips_market_penetration_UNCLE GARY'S COFFEE EMPORIUM_03,trip_destination_id_UNCLE GARY'S COFFEE EMPORIUM_03
origin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
060930004001,311.607615,272.600000,0.003344,723838910.0,,,,,,,...,,,,,,,,,,
410030002021,74.813068,80.483333,0.001764,723827347.0,,,,,,,...,,,,,,,,,,
410030002022,73.594560,83.183333,0.001138,723827347.0,,,,,,,...,,,,,,,,,,
410030004002,79.987225,110.916667,0.000807,723838910.0,,,,,,,...,,,,,,,,,,
410030004003,70.825731,90.416667,0.001379,723827347.0,78.924059,94.516667,0.001379,723838910.0,,,...,,,,,,,,,,
410030009003,68.689457,80.750000,0.001066,723827347.0,79.638014,98.283333,0.001066,723838910.0,,,...,,,,,,,,,,
410030011021,74.209096,71.133333,0.002770,723827347.0,,,,,,,...,,,,,,,,,,
410030101003,60.136285,63.333333,0.001016,723827347.0,,,,,,,...,,,,,,,,,,
410030106001,71.860313,72.933333,0.001355,723827347.0,,,,,,,...,,,,,,,,,,
410030107023,72.853886,73.416667,0.002242,723827347.0,,,,,,,...,,,,,,,,,,


In [128]:
cols = [arcpy.ValidateFieldName(col.replace("'",''), gdb_int) for col in trips_by_origin_cat_df.columns]
trips_by_origin_cat_df.columns = cols

In [131]:
trips_by_origin_cat_df.sample(5)

Unnamed: 0_level_0,trip_distance_miles_ALLEGRO_COFFEE_01,trip_time_minutes_ALLEGRO_COFFEE_01,trips_market_penetration_ALLEGRO_COFFEE_01,trip_destination_id_ALLEGRO_COFFEE_01,trip_distance_miles_ALLEGRO_COFFEE_02,trip_time_minutes_ALLEGRO_COFFEE_02,trips_market_penetration_ALLEGRO_COFFEE_02,trip_destination_id_ALLEGRO_COFFEE_02,trip_distance_miles_ALLEGRO_COFFEE_03,trip_time_minutes_ALLEGRO_COFFEE_03,...,trips_market_penetration_UNCLE_GARYS_COFFEE_EMPORIUM_01,trip_destination_id_UNCLE_GARYS_COFFEE_EMPORIUM_01,trip_distance_miles_UNCLE_GARYS_COFFEE_EMPORIUM_02,trip_time_minutes_UNCLE_GARYS_COFFEE_EMPORIUM_02,trips_market_penetration_UNCLE_GARYS_COFFEE_EMPORIUM_02,trip_destination_id_UNCLE_GARYS_COFFEE_EMPORIUM_02,trip_distance_miles_UNCLE_GARYS_COFFEE_EMPORIUM_03,trip_time_minutes_UNCLE_GARYS_COFFEE_EMPORIUM_03,trips_market_penetration_UNCLE_GARYS_COFFEE_EMPORIUM_03,trip_destination_id_UNCLE_GARYS_COFFEE_EMPORIUM_03
origin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
530150020025,,,,,,,,,,,...,,,,,,,,,,
530670115001,,,,,,,,,,,...,,,,,,,,,,
410670308052,,,,,,,,,,,...,,,,,,,,,,
410670319123,,,,,,,,,,,...,0.006579,268703220.0,9.710475,20.483333,0.006579,520835364.0,5.490434,21.166667,0.003289,726370456.0
410579604004,,,,,,,,,,,...,,,,,,,,,,


In [None]:
trips_by_origin_cat_df.spatial.to_table(os.path.join(gdb_int, 'bg_to_store_pivot'))

In [130]:
bg_stats_df = block_group_sum_df.set_index('origin_id').join(trips_by_origin_cat_df)
bg_stats_df.sample(5)

MemoryError: 

In [29]:
bg_stats_df.reset_index(inplace=True)
bg_stats_df.sample(5)

In [33]:
bg_stats_df.rename({"index": "block_group_id"}, axis=1, inplace=True)

In [34]:
bg_stats_df.fillna(0, inplace=True)

In [35]:
bg_stats_df

Unnamed: 0,block_group_id,gender_pop0_cy,gender_pop5_cy,gender_pop10_cy,gender_pop15_cy,gender_pop20_cy,gender_pop25_cy,gender_pop30_cy,gender_pop35_cy,gender_pop40_cy,...,trip_time_minutes_OTHER_03,trip_time_minutes_STARBUCKS_01,trip_time_minutes_STARBUCKS_02,trip_time_minutes_STARBUCKS_03,trip_time_minutes_STUMPTOWN_COFFEE_ROASTERS_01,trip_time_minutes_STUMPTOWN_COFFEE_ROASTERS_02,trip_time_minutes_STUMPTOWN_COFFEE_ROASTERS_03,trip_time_minutes_UNCLE_GARYS_COFFEE_EMPORIUM_01,trip_time_minutes_UNCLE_GARYS_COFFEE_EMPORIUM_02,trip_time_minutes_UNCLE_GARYS_COFFEE_EMPORIUM_03
0,060490002001,73.0,75.0,81.0,71.0,54.0,50.0,51.0,62.0,63.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,060930001001,127.0,112.0,106.0,98.0,110.0,105.0,100.0,77.0,76.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,060930002001,117.0,117.0,119.0,112.0,110.0,115.0,110.0,114.0,96.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,060930003001,62.0,70.0,80.0,77.0,64.0,66.0,62.0,71.0,70.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,060930004001,19.0,20.0,39.0,19.0,23.0,26.0,22.0,23.0,25.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,060930004001,19.0,20.0,39.0,19.0,23.0,26.0,22.0,23.0,25.0,...,0.0,270.266667,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,060930004001,19.0,20.0,39.0,19.0,23.0,26.0,22.0,23.0,25.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,272.6,0.0,0.0
7,410019501001,18.0,22.0,27.0,32.0,19.0,24.0,28.0,26.0,28.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,410019501002,64.0,60.0,43.0,46.0,43.0,45.0,48.0,34.0,51.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,410019501003,41.0,53.0,65.0,50.0,32.0,29.0,44.0,37.0,34.0,...,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
bg_stats_df['trips_market_penetration']

0         0.003344
1         0.003344
2         0.003344
3         0.002008
4         0.002008
5         0.001848
6         0.001848
7         0.001848
8         0.001848
9         0.001656
10        0.001656
11        0.001656
12        0.001656
13        0.001828
14        0.001828
15        0.001650
16        0.001650
17        0.001650
18        0.000784
19        0.000784
20        0.000784
21        0.000784
22        0.000784
23        0.000784
24        0.000784
25        0.000784
26        0.001764
27        0.001764
28        0.001764
29        0.001764
            ...   
144396    0.000986
144397    0.000986
144398    0.001395
144399    0.001202
144400    0.000913
144401    0.006024
144402    0.001206
144403    0.002538
144404    0.002538
144405    0.002538
144406    0.002538
144407    0.002538
144408    0.002538
144409    0.002538
144410    0.002538
144411    0.002538
144412    0.002538
144413    0.001639
144414    0.000806
144415    0.000939
144416    0.000875
144417    0.