In [1]:
%matplotlib inline

from arcgis.features import GeoAccessor
from arcgis.geometry import Geometry
import pandas as pd
import os
import numpy as np
import itertools
from sklearn.pipeline import Pipeline

import sys
sys.path.append('../src/geoai-retail')
from data import *

gdb_int = r'D:\\projects\\geoai-retail\\data\\interim\\interim.gdb'
block_group_fc = os.path.join(gdb_int, 'block_groups_enriched')
trips_fc = os.path.join(gdb_int, 'trips')

gdb_raw = r'D:\\projects\\geoai-retail\\data\\raw\\raw.gdb'
stores_fc = os.path.join(gdb_raw, 'coffee')

%load_ext autoreload
%autoreload 2

In [2]:
block_group_df = GeoAccessor.from_featureclass(block_group_fc)
trips_df = GeoAccessor.from_featureclass(trips_fc)
stores_df = GeoAccessor.from_featureclass(stores_fc)

In [3]:
bg_loader = DemographicFeatureClassToDataframe('../data/interim/alias_table.csv')
bg_df = bg_loader.fit_transform(block_group_fc)
bg_df.sample(5)

Unnamed: 0_level_0,gender_pop0_cy,gender_pop5_cy,gender_pop10_cy,gender_pop15_cy,gender_pop20_cy,gender_pop25_cy,gender_pop30_cy,gender_pop35_cy,gender_pop40_cy,gender_pop45_cy,...,educationalattainment_ged_cy,educationalattainment_smcoll_cy,educationalattainment_asscdeg_cy,educationalattainment_bachdeg_cy,educationalattainment_graddeg_cy,educationalattainment_educbasecy,households_acshhbpov,households_acshhapov,households_acsbpovmcf,SHAPE
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
410470020003,133.0,150.0,181.0,195.0,156.0,124.0,127.0,171.0,175.0,184.0,...,24.0,370.0,144.0,475.0,314.0,1652.0,18.0,834.0,18.0,"{""rings"": [[[-122.99843999899997, 44.885180000..."
530610412021,110.0,106.0,86.0,70.0,94.0,116.0,116.0,94.0,94.0,95.0,...,0.0,233.0,59.0,33.0,0.0,872.0,229.0,457.0,30.0,"{""rings"": [[[-122.21232999899996, 47.931250000..."
530530617004,55.0,49.0,42.0,50.0,73.0,95.0,89.0,55.0,41.0,50.0,...,53.0,130.0,36.0,83.0,36.0,506.0,64.0,168.0,0.0,"{""rings"": [[[-122.44629999999995, 47.233740000..."
530610410003,70.0,75.0,76.0,71.0,92.0,103.0,113.0,123.0,113.0,111.0,...,42.0,355.0,152.0,217.0,27.0,1183.0,133.0,569.0,0.0,"{""rings"": [[[-122.20657999999997, 47.949440000..."
410191600004,79.0,79.0,80.0,75.0,76.0,96.0,97.0,83.0,62.0,53.0,...,65.0,399.0,18.0,0.0,20.0,714.0,146.0,163.0,108.0,"{""rings"": [[[-123.41537849999997, 43.115381000..."


In [4]:
store_pipe = Pipeline([
    ('loader', EsriLocatedStoresFeatureClassToDataframe()),
    ('count_reclass', StoreClassifyByCount(store_count_threshold=3))
])
store_df = store_pipe.fit_transform(stores_fc)
store_df.sample(5)

Unnamed: 0_level_0,store_class,store_class_original,SHAPE
store_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
417755893,OTHER,LOTUS & BEAN ESPRESSO BAR,"{""x"": -122.67479999964382, ""y"": 45.51899999996..."
232331066,STARBUCKS,STARBUCKS,"{""x"": -122.67160000001041, ""y"": 45.63939999967..."
396149163,STARBUCKS,STARBUCKS,"{""x"": -122.33589999959293, ""y"": 45.57850000027..."
632063715,STARBUCKS,STARBUCKS,"{""x"": -122.97439999982186, ""y"": 45.52079999950..."
706070674,DUTCH BROTHERS COFFEE,DUTCH BROTHERS COFFEE,"{""x"": -122.87830000045602, ""y"": 45.75159999956..."


In [5]:
web_map = store_df[store_df['store_class'] == "UNCLE GARY'S COFFEE EMPORIUM"].spatial.plot()
web_map.basemap ='streets-relief-vector'
web_map

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._data['OBJECTID'] = list(range(1, self._data.shape[0] + 1))


MapView(layout=Layout(height='400px', width='100%'))

In [6]:
origin_sum_pipe = Pipeline([
    ('summ_trips', SummarizeInrixTripsByOriginAndDestination()),
    ('add_block_group_df', AddPolygonOriginDataframeToInrixTripSummaryDataframe(bg_df)),
    ('calc_mkt_pen', CalculateMarketPenetration('trip_count', 'householdtotals_tothh_cy'))
])
origin_sum_df = origin_sum_pipe.fit_transform(trips_df)
origin_sum_df.head(5)

Unnamed: 0,origin_id,destination_id,trip_distance_miles,trip_time_minutes,trip_count,gender_pop0_cy,gender_pop5_cy,gender_pop10_cy,gender_pop15_cy,gender_pop20_cy,...,educationalattainment_smcoll_cy,educationalattainment_asscdeg_cy,educationalattainment_bachdeg_cy,educationalattainment_graddeg_cy,educationalattainment_educbasecy,households_acshhbpov,households_acshhapov,households_acsbpovmcf,SHAPE,market_penetration
0,60930004001,219649035,311.607615,272.6,1,19.0,20.0,39.0,19.0,23.0,...,169.0,37.0,37.0,34.0,507.0,42.0,226.0,22.0,"{'rings': [[[-122.368049999, 42.00933000000003...",0.003344
1,60930004001,718176685,261.898556,270.266667,1,19.0,20.0,39.0,19.0,23.0,...,169.0,37.0,37.0,34.0,507.0,42.0,226.0,22.0,"{'rings': [[[-122.368049999, 42.00933000000003...",0.003344
2,60930004001,723838910,311.607615,272.6,1,19.0,20.0,39.0,19.0,23.0,...,169.0,37.0,37.0,34.0,507.0,42.0,226.0,22.0,"{'rings': [[[-122.368049999, 42.00933000000003...",0.003344
3,410019502002,105830012,304.35373,276.283333,1,78.0,75.0,71.0,72.0,78.0,...,264.0,18.0,128.0,30.0,792.0,170.0,294.0,19.0,"{'rings': [[[-117.82904999999994, 44.777120000...",0.002008
4,410019502002,732273983,304.35373,276.283333,1,78.0,75.0,71.0,72.0,78.0,...,264.0,18.0,128.0,30.0,792.0,170.0,294.0,19.0,"{'rings': [[[-117.82904999999994, 44.777120000...",0.002008


In [7]:
from data import *

proximity_metric_fields = ['trip_distance_miles', 'trip_time_minutes']
proximity_sort_field = 'trip_distance_miles'
measurement_metric_field = 'market_penetration'

bg_trips_pipe = Pipeline([
    ('summ_trips', SummarizeInrixTripsByOriginAndDestination()),
    ('add_stores', AddStoresDataframeToTripsDataframe(store_df)),
    ('add_block_groups', AddBlockGroupDataframeToTripsDataframe(origin_sum_df)),
    ('calc_dest_metrics', CalculateOriginProximityMetricsByStoreClass(proximity_metric_fields, 
                                                                     proximity_sort_field, 
                                                                     measurement_metric_field)),
    ('add_demog', AddDemographicsToProximityMetrics(bg_df))
])

final_df = bg_trips_pipe.fit_transform(trips_df)
final_df.head()

Unnamed: 0_level_0,gender_pop0_cy,gender_pop5_cy,gender_pop10_cy,gender_pop15_cy,gender_pop20_cy,gender_pop25_cy,gender_pop30_cy,gender_pop35_cy,gender_pop40_cy,gender_pop45_cy,...,trip_time_minutes_OTHER_03,trip_time_minutes_STARBUCKS_01,trip_time_minutes_STARBUCKS_02,trip_time_minutes_STARBUCKS_03,trip_time_minutes_STUMPTOWN_COFFEE_ROASTERS_01,trip_time_minutes_STUMPTOWN_COFFEE_ROASTERS_02,trip_time_minutes_STUMPTOWN_COFFEE_ROASTERS_03,trip_time_minutes_UNCLE_GARYS_COFFEE_EMPORIUM_01,trip_time_minutes_UNCLE_GARYS_COFFEE_EMPORIUM_02,trip_time_minutes_UNCLE_GARYS_COFFEE_EMPORIUM_03
origin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
410030103003,28.0,43.0,53.0,45.0,43.0,52.0,47.0,38.0,36.0,56.0,...,,81.05,,,,,,,,
410110001001,29.0,30.0,32.0,29.0,30.0,29.0,27.0,40.0,38.0,38.0,...,,,,,,,,,,
410110001002,13.0,15.0,17.0,18.0,18.0,21.0,21.0,22.0,22.0,37.0,...,,206.733333,,,,,,,,
410110001003,29.0,29.0,31.0,36.0,56.0,74.0,83.0,73.0,63.0,87.0,...,,,,,,,,,,
410110001004,43.0,47.0,59.0,48.0,42.0,52.0,51.0,56.0,50.0,57.0,...,,,,,,,,,,


In [8]:
final_fs = final_df.spatial.to_featureset()
final_fs

<FeatureSet> 6963 features

In [10]:
final_fs.save(gdb_int, 'block_group_final')

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

ExecuteError: ERROR 001558: Error parsing json file 'C:\Users\joel5174\AppData\Local\Temp\8f3694f2da0347e8b8c8c3e1abe69b6e.json'. The value type is incompatible with the field type. [dest_id_OTHER_01]
Failed to execute (JSONToFeatures).


In [11]:
final_df.to_csv(r'D:\\projects\\geoai-retail\\data\\interim\\block_group_final.csv')

In [12]:
final_nonspatial_df = final_df.drop('SHAPE', axis=1)
final_nonspatial_df.to_csv(r'D:\\projects\\geoai-retail\\data\\interim\\block_group_final_nonspatial.csv')