In [8]:
import pandas as pd
import json
import os
from random import uniform, randint
import math
from tqdm.notebook import tqdm

In [9]:
# if not already compiled, run this code
if (not os.path.exists("compiled.csv")):
    # read the geojson file
    files = ["residential.geojson", "commercial.geojson"]

    df = pd.DataFrame()

    for file in files:
        print(f"Reading {file}...")
        with open(file) as f:
            gj = json.load(f)

        # load the json as a pandas dataframe
        df_temp = pd.json_normalize(gj['features'])

        # list of columns to keep
        features = ["properties.PIN", "properties.PROP_NAME", "properties.LAND_USE_DESC", "properties.LAND_USE_DESC_10", "properties.STATUS_TEXT_2", "geometry.type", "geometry.coordinates"]
        df_temp = df_temp[features]

        # add a column file of origin
        df_temp["file"] = file

        # append to the main dataframe
        print(f"Appending {file} to main dataframe...")
        df = pd.concat([df, df_temp], ignore_index=True)
        print(f"Done appending {file} to main dataframe...")


    # save the pandas dataframe as a csv file
    print("Saving dataframe to csv...")
    df.to_csv("compiled.csv", index=False)

    df.sample(10)

In [10]:
# read the csv file just incase
df = pd.read_csv("compiled.csv")
df.sample(10)

Unnamed: 0,properties.PIN,properties.PROP_NAME,properties.LAND_USE_DESC,properties.LAND_USE_DESC_10,properties.STATUS_TEXT_2,geometry.type,geometry.coordinates,file
79181,6380500090,,Single Family(Res Use/Zone),Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.358912940811, 47.7147533108691], [-122...",residential.geojson
158712,2908700010,Office Building (Assoc Parking Acct #290870-0005),Office Building,Commercial/Mixed-Use,(21% - 40% of allowed density),Polygon,"[[[-122.314948511347, 47.6060883489188], [-122...",commercial.geojson
100739,3680400195,,Single Family(Res Use/Zone),Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.300688308608, 47.5664677749665], [-122...",residential.geojson
48853,8673400172,Townhouse,Townhouse Plat,Multi-Family,(41% - 80% of allowed density),Polygon,"[[[-122.391748727971, 47.6692411852709], [-122...",residential.geojson
122071,1604601358,VACANT LOT,Vacant(Multi-family),Vacant,(under 20% of allowed density),Polygon,"[[[-122.289598025286, 47.5645224621732], [-122...",residential.geojson
15480,1118000895,,Single Family(Res Use/Zone),Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.290122380038, 47.6370016290403], [-122...",residential.geojson
44730,3904100059,TOWNHOUSE UNIT - C,Townhouse Plat,Multi-Family,(81% - 100% of allowed density),Polygon,"[[[-122.278595109809, 47.534298259806], [-122....",residential.geojson
54755,6411600087,,Single Family(Res Use/Zone),Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.330090073665, 47.7140004824209], [-122...",residential.geojson
128340,1862400187,,Single Family(Res Use/Zone),Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.370623715073, 47.6969702136442], [-122...",residential.geojson
162995,1972203760,DIAMOND MACHINE WORKS,Industrial(Gen Purpose),Industrial,(41% - 80% of allowed density),Polygon,"[[[-122.356030767352, 47.6524658251445], [-122...",commercial.geojson


In [11]:
# print all unique values in LAND_USE_DESC for residential properties
df[df["file"] == "residential.geojson"]["properties.LAND_USE_DESC"].unique()

array(['Single Family(Res Use/Zone)', 'Duplex', 'Triplex',
       'Townhouse Plat', 'Nursing Home', 'Apartment',
       'Condominium(Residential)', 'Single Family(C/I Zone)',
       'Vacant(Single-family)', '4-Plex', 'Unknown',
       'Vacant(Multi-family)', 'Utility, Private(Radio/T.V.)',
       'Retail Store', 'Parking(Assoc)', 'Apartment(Mixed Use)',
       'Group Home', 'Apartment(Subsidized)', 'Parking(Commercial Lot)',
       'Industrial(Gen Purpose)', 'Restaurant/Lounge', 'Daycare Center',
       'Vacant(Commercial)', 'Easement', 'Industrial(Lignt)',
       'Single Family(C/I Use)', 'School(Public)', 'Office Building',
       'Retail(Line/Strip)', 'Grocery Store', 'Mobile Home',
       'Retirement Facility', 'Service Building', 'Vacant(Industrial)',
       'Warehouse', 'Conv Store with Gas', 'Club', 'Parking(Garage)',
       'Health Club', 'Medical/Dental Office', 'School(Private)',
       'Service Station', 'Apartment(Co-op)', 'Conv Store without Gas',
       'Park, Private(Amu

In [12]:
def occupancy_gen(string):
    if string == "(more than 100% of allowed density)":
        # generate a logarithmic distribution float between 1.0 and 10.0
        mu = 1.0
        u = uniform(0, 1)
        x = 1.0 + (3.0 - 1.0) * (1 - math.exp(-mu * u)) / (1 - math.exp(-mu))
        return x
    elif string == "(81% - 100% of allowed density)":
    # generate a random float between 0.81 and 1.0
        return uniform(0.81, 1.0)
    elif string == "(41% - 80% of allowed density)":
        return uniform(0.41, 0.8)
    elif string == "(21% - 40% of allowed density)":
        return uniform(0.21, 0.4)
    elif string == "(under 20% of allowed density)":
        return uniform(0.0, 0.2)
    else:
        raise ValueError(f"Invalid density string {string}")

In [13]:
SINGLE_FAM_TAG = ["Single Family(Res Use/Zone)", "Single Family(C/I Zone)", "Single Family(C/I Use)", "Unknown", "Mobile Home"]
TWO_FAM_TAG = ['Duplex']
THREE_FAM_TAG = ["Triplex"]
FOUR_FAM_TAG = ["4-Plex"]
SMALL_MULTI_TAG = ["Townhouse Plat", "Nursing Home", "Group Home", "Apartment(Co-op)"]
LARGE_MULTI_TAG = ["Apartment", "Condominium(Residential)", "Apartment(Subsidized)", "Retirement Facility", "Mobile Home Park", "Condominium(M Home Pk)"]
VACANT = ["Vacant(Single-family)", "Vacant(Multi-family)"]

# create a new LAND_USE_DESC column
# for each row
def residential_gen(_df):
    # process only residential properties
    for index, row in tqdm(_df.iterrows(), total=_df.shape[0]):
        # if the properties.LAND_USE_DESC is in the list of single family tags
        if row["properties.LAND_USE_DESC"] in SINGLE_FAM_TAG:
            # set the occupancy to 1
            _df.loc[index, "occupancy"] = int(occupancy_gen(row["properties.STATUS_TEXT_2"]) * OCCU_FAM)
            _df.loc[index, "properties.LAND_USE_DESC"] = "Single Family"
        elif row["properties.LAND_USE_DESC"] in TWO_FAM_TAG:
            _df.loc[index, "occupancy"] = int(occupancy_gen(row["properties.STATUS_TEXT_2"]) * 2 * OCCU_FAM)
            _df.loc[index, "properties.LAND_USE_DESC"] = "Two Family"
        elif row["properties.LAND_USE_DESC"] in THREE_FAM_TAG:
            _df.loc[index, "occupancy"] = int(occupancy_gen(row["properties.STATUS_TEXT_2"]) * 3 * OCCU_FAM)
            _df.loc[index, "properties.LAND_USE_DESC"] = "Three Family"
        elif row["properties.LAND_USE_DESC"] in FOUR_FAM_TAG:
            _df.loc[index, "occupancy"] = int(occupancy_gen(row["properties.STATUS_TEXT_2"]) * 4 * OCCU_FAM)
            _df.loc[index, "properties.LAND_USE_DESC"] = "Four Family"
        elif row["properties.LAND_USE_DESC"] in SMALL_MULTI_TAG:
            n_unit = randint(4, 10)
            _df.loc[index, "occupancy"] = int(occupancy_gen(row["properties.STATUS_TEXT_2"]) * n_unit * OCCU_FAM)
            _df.loc[index, "properties.LAND_USE_DESC"] = "Small Multi Family"
        elif row["properties.LAND_USE_DESC"] in LARGE_MULTI_TAG:
            n_unit = randint(10, 50)
            _df.loc[index, "occupancy"] = int(occupancy_gen(row["properties.STATUS_TEXT_2"]) * n_unit * OCCU_FAM)
            _df.loc[index, "properties.LAND_USE_DESC"] = "Large Multi Family"
        elif row["properties.LAND_USE_DESC"] in VACANT:
            _df.loc[index, "occupancy"] = 0
            _df.loc[index, "properties.LAND_USE_DESC"] = "Vacant"
        else:
            continue

        _df.loc[index, "type"] = "residential"

# add a new column occupancy
df["occupancy"] = 0
OCCU_FAM = 6 # max occupancy for single family residential

# add occupancy to residential properties
residential_gen(df)

  0%|          | 0/168964 [00:00<?, ?it/s]

In [15]:
# save to csv
df.to_csv("compiled_1_res.csv", index=False)
df.sample(10)

Unnamed: 0,properties.PIN,properties.PROP_NAME,properties.LAND_USE_DESC,properties.LAND_USE_DESC_10,properties.STATUS_TEXT_2,geometry.type,geometry.coordinates,file,occupancy,type
83904,3438500730,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.355379954836, 47.5536247481746], [-122...",residential.geojson,5,residential
29579,946000205,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.363641081405, 47.6910287657686], [-122...",residential.geojson,5,residential
80896,3076500560,,Single Family,Single Family,(41% - 80% of allowed density),Polygon,"[[[-122.31753598365, 47.5762602120484], [-122....",residential.geojson,4,residential
4405,1498301315,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.311612582668, 47.5847577309197], [-122...",residential.geojson,5,residential
44474,5366200390,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.291645857272, 47.6082593482472], [-122...",residential.geojson,5,residential
15372,1118002125,,Vacant,Vacant,(under 20% of allowed density),Polygon,"[[[-122.287895822078, 47.6299688776539], [-122...",residential.geojson,0,residential
116720,9828200235,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.301013502516, 47.6155647485207], [-122...",residential.geojson,5,residential
7817,3352400785,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.264331801828, 47.5019494189892], [-122...",residential.geojson,5,residential
104503,3333001792,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.288419049644, 47.5455989939216], [-122...",residential.geojson,5,residential
149843,7640400175,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.369722539946, 47.7210860060726], [-122...",residential.geojson,4,residential


Unnamed: 0,properties.PIN,properties.PROP_NAME,properties.LAND_USE_DESC,properties.LAND_USE_DESC_10,properties.STATUS_TEXT_2,geometry.type,geometry.coordinates,file,occupancy,type
34567,2780400030,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.28563747356, 47.6811741346379], [-122....",residential.geojson,5,residential
149809,1794500185,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.357942986372, 47.6392381135441], [-122...",residential.geojson,5,residential
65880,2597800055,,Single Family,Single Family,(21% - 40% of allowed density),Polygon,"[[[-122.38627473017, 47.5200167355564], [-122....",residential.geojson,2,residential
35664,2878601100,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.322240067866, 47.6881441741841], [-122...",residential.geojson,5,residential
153547,2826049131,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.305143125466, 47.7148168139036], [-122...",residential.geojson,5,residential
152395,2883201260,SP 77-161 & 77-163,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.330540442275, 47.6848555029813], [-122...",residential.geojson,5,residential
51583,6021500955,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.386811492816, 47.6904967188948], [-122...",residential.geojson,5,residential
46693,123039548,,Single Family,Single Family,(21% - 40% of allowed density),Polygon,"[[[-122.375338332877, 47.504601279209], [-122....",residential.geojson,1,residential
22102,9551202390,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.335967751192, 47.6695590289343], [-122...",residential.geojson,5,residential
116419,5316100970,,Single Family,Single Family,(81% - 100% of allowed density),Polygon,"[[[-122.283458574406, 47.6291593315567], [-122...",residential.geojson,5,residential
