In [2]:
!pip install -U pyarrow

Requirement already up-to-date: pyarrow in /opt/conda/lib/python3.7/site-packages (5.0.0)


In [3]:
# Source: https://github.com/prestinomills/aqueduct/blob/Know_Your_Community_Pipelines/civis/geohub/ActiveBusinessBlockgroupAggregation/Active_Business_Finalized_Script.py
# %load active_business_script.py
"""
Created on Wed May  1 08:51:03 2019
@author: myrfid041
"""
import geopandas as gpd
import os
import pandas as pd

import utils
from civis_aqueduct_utils.github import upload_file_to_github

from sodapy import Socrata
from arcgis.gis import GIS
from arcgis.features.summarize_data import join_features
from arcgis.features import FeatureLayer, FeatureLayerCollection
from copy import deepcopy
from IPython.display import display

import esri_credentials
lahub_user = esri_credentials.preston_user
lahub_pass = esri_credentials.preston_pw

#---Setting the Outputs
OUTPUT_FILE = "./Listing_of_Active_Businesses.csv"
output_layer_name = "a7236cc62ded454c94a64e9d80d6304a"

'''
ESRI stores the column names slightly differently (subject to 10 char limits)
Use dict to map and rename (key-value pair)
Key: dataframe's existing column name
Value: ESRI column name
'''
LAYER_RENAME_COLUMNS_DICT = {
    'Accommodation and Food Services': 'Accommodation_and_Food_Services',
    'Administrative and Support and Waste Management and Remediation Services': 'Administrative_and_Support_and_',
    'Agriculture, Forestry, Fishing and Hunting': 'Agriculture__Forestry__Fishing_',
    'Arts, Entertainment, and Recreation': 'Arts__Entertainment__and_Recrea',
    'Construction': 'Construction',
    'Educational Services': 'Educational_Services',
    'Finance and Insurance': 'Finance_and_Insurance',
    'Health Care and Social Assistance': 'Health_Care_and_Social_Assistan',
    'Information': 'Information',
    'Manufacturing': 'Manufacturing',
    'Medical Marijuana Collective': 'Medical_Marijuana_Collective',
    'Mining': 'Mining',
    'Not Classified': 'Not_Classified',
    'Other Services (except Public Administration)': 'Other_Services__except_Public_A',
    'Professional, Scientific, and Technical Services': 'Professional__Scientific__and_T',
    'Real Estate Rental and Leasing': 'Real_Estate_Rental_and_Leasing',
    'Retail Trade': 'Retail_Trade',
    'Transportation and Warehousing': 'Transportation_and_Warehousing',
    'Utilities': 'Utilities',
    'Wholesale Trade': 'Wholesale_Trade'                                               
}

In [4]:
df = pd.read_pickle("./cleaned_df.p")

In [5]:
gis = GIS('https://lahub.maps.arcgis.com', username=lahub_user, password=lahub_pass)
flayer = gis.content.get(output_layer_name)
ActiveBusinesses_flayer = flayer.layers[0]
ActiveBusinesses_fset = ActiveBusinesses_flayer.query() #querying without any conditions returns all the features

In [6]:
# Grab the spatial dataframe in the layer
# Drop the GEOID10s that don't have a match in the existing sdf
# Only keep the columns to update (various industries), and leave geometry columns intact
existing_table = ActiveBusinesses_fset.sdf

industry_cols = list(LAYER_RENAME_COLUMNS_DICT.values())

new_updated_table = (df[df.GEOID10.isin(existing_table.GEOID10)]
                     .rename(columns = LAYER_RENAME_COLUMNS_DICT)
                 [["GEOID10"] + industry_cols]
                )
new_updated_table[industry_cols] = new_updated_table[industry_cols].fillna(0).astype(int)


In [7]:
#new_updated_table.to_parquet("./new_updated_table.parquet", engine="pyarrow")

In [None]:
# Introduce changes to look for
check_me = ["060372932023", "060372941201"]

for c in LAYER_RENAME_COLUMNS_DICT.values():
    new_updated_table[c] = new_updated_table.apply(lambda x: 3_000 if x.GEOID10 in check_me
                                    else x[c], axis=1)

In [None]:
new_updated_table[new_updated_table.GEOID10.isin(check_me)]

In [None]:
updated_values_dict = new_updated_table.set_index("GEOID10").to_dict(orient="index")

In [None]:
# Grab the features, save as a list
feature_list = ActiveBusinesses_fset.features
features_to_be_updated = feature_list[:]

In [None]:
# Loop through each GEOID, then loop through the columns, and update the values
# Use try/except because there are some GEOIDs in the original sdf that aren't in our df (about 10ish)
# Leave these with original values, since we don't have updated values
for i in range(0, len(features_to_be_updated)):
    obs = features_to_be_updated[i]
try:
    geoid = obs.attributes["GEOID10"]
    if geoid in check_me:
        for col in LAYER_RENAME_COLUMNS_DICT.values():
            #print(f"orig: {obs.attributes[col]}, new: {updated_values_dict[geoid][col]}")
            #obs.attributes[col] = updated_values_dict[geoid][col]
            features_to_be_updated[i].attributes[col] = updated_values_dict[geoid][col]
            #print(features_to_be_updated[i].attributes[col])
except:
    pass

In [None]:
features_to_be_updated

In [None]:
utils.chunks(features_to_be_updated, 1000, ActiveBusinesses_flayer)