In [None]:
import os
import sys
import pandas as pd


user = os.environ['USER']
sys.path.insert(0, '/Users/{}/Box/DataViz Projects/Utility Code'.format(user))
from utils_io import *

# Create Urbansim datasets on Socrata

- initialize Socrata dataset
    - specify data types
    - specify metadata
        - name
        - description
        - tags
        - category
- pull Urbansim data from Redshift tables
- upsert rows in Socrata with Urbansim table data
- publish Socrata dataset

## Buildings

In [None]:
# Possible data types at https://dev.socrata.com/docs/datatypes/#,
buildings_ctype_dict = {'apn': 'text',
                         'jurisdiction_cty': 'text',
                         'parcel_id': 'text',
                         'building_id': 'text',
                         'building_type': 'text',
                         'building_sqft': 'number',
                         'non_residential_sqft': 'number',
                         'residential_units': 'number',
                         'year_built': 'number',
                         'assessed_building_value': 'number',
                         'assessed_date': 'number',
                         'last_sale_price': 'number',
                         'last_sale_date': 'number',
                         'tenure': 'text',
                         'rent_type': 'text',
                         'joinid': 'text',
                         'jurisdict': 'text',
                         'fipco': 'text'}

# columns must have these fields: fieldName, name, dataTypeName
columns = [{'fieldName': k, 'name': k, 'dataTypeName': v} for k, v in buildings_ctype_dict.items()]

dataset_name = 'Urbansim Buildings'
dataset_desc = 'Urbansim Buildings dataset'
tags = ['basis', 'land use data processing', 'parcels']
category = 'Land Use'

dataset_metadata = create_socrata_dataset(dataset_name, dataset_desc, columns, category, tags)
socrata_data_id = dataset_metadata['id']

# download basis.urbansim_buildings table as df
sql_statement = 'select * from basis.urbansim_buildings'
df = pull_df_from_redshift_sql(sql_statement, dbname='dev')

# upsert data in chunks
upsert_df_socrata(df, socrata_data_id)

# publish data
publish_socrata_dataset(socrata_data_id)

## Parcels

In [None]:
# Possible data types at https://dev.socrata.com/docs/datatypes/#,
parcels_ctype_dict = {'apn': 'text',
                     'jurisdiction_cty': 'text',
                     'parcel_id': 'text',
                     'assessed_land_value': 'number',
                     'assessed_date': 'number',
                     'acres': 'number',
                     'county_id': 'text',
                     'county': 'text',
                     'zone_id': 'text',
                     'taz22': 'text',
                     'maz': 'text',
                     'X': 'number',
                     'Y': 'number',
                     'juris_id': 'text',
                     'pda_id': 'text',
                     'tpa_id': 'text',
                     'opp_id': 'text',
                     'exp_id': 'text',
                     'exp_score': 'text',
                     'zoningmodcat': 'text',
                     'joinid': 'text',
                     'jurisdict': 'text',
                     'fipco': 'text'}

 
# columns must have these fields: fieldName, name, dataTypeName
# also field names must be lowercase (English) letters, digits and underscore (must start with a letter or underscore)
columns = [{'fieldName': k.lower(), 'name': k, 'dataTypeName': v} for k, v in parcels_ctype_dict.items()]

dataset_name = 'Urbansim Parcels'
dataset_desc = 'Urbansim Parcels dataset'
tags = ['basis', 'land use data processing', 'parcels']
category = 'Land Use'

dataset_metadata = create_socrata_dataset(dataset_name, dataset_desc, columns, category, tags)
socrata_data_id = dataset_metadata['id']

# download basis.urbansim_parcels table as df
sql_statement = 'select * from basis.urbansim_parcels'
df = pull_df_from_redshift_sql(sql_statement, dbname='dev')

# upsert data in chunks
upsert_df_socrata(df, socrata_data_id)

# publish data
publish_socrata_dataset(socrata_data_id)