# Link census tracts to TOC tiers
* Find what proportion belongs within a particular tier

In [1]:
import intake
import numpy as np
import pandas as pd
import geopandas as gpd
import boto3
import utils

In [2]:
catalog = intake.open_catalog("../catalogs/*.yml")

s3 = boto3.client('s3')
bucket_name = 'city-planning-entitlements'

In [3]:
parcels = gpd.read_file(
            f'zip+s3://{bucket_name}/gis/intermediate/test_parcels_to_tracts.zip').to_crs({'init':'epsg:2229'})

In [4]:
parcels.head()

Unnamed: 0,AIN,parcel_sqf,x,y,num_AIN,GEOID,pop,geometry
0,2004001003,9685.584413,6374033.0,1903105.0,1,6037113232,4265,"POLYGON ((6374107.864 1903073.789, 6373958.854..."
1,2004001004,9909.894744,6374033.0,1903040.0,1,6037113232,4265,"POLYGON ((6374107.604 1903007.279, 6373958.604..."
2,2004001005,9641.455083,6374031.0,1902974.0,1,6037113232,4265,"POLYGON ((6374107.484 1902983.709, 6374106.651..."
3,2004001008,11814.429891,6373418.0,1903151.0,1,6037113232,4265,"POLYGON ((6373470.224 1903188.539, 6373470.644..."
4,2004001009,14878.423677,6373314.0,1903147.0,1,6037113232,4265,"POLYGON ((6373398.904 1903072.729, 6373392.634..."


In [20]:
parcels2 = parcels.drop_duplicates(subset = ['x', 'y', 'num_AIN'], keep = 'first')

In [6]:
tracts = catalog.census_tracts.read().to_crs({'init':'epsg:2229'})

tracts = (
    tracts[['GEOID10', 'geometry']]
    .rename(columns = {'GEOID10': 'GEOID', 'geometry':'tract_geom'})
    .assign(tract_sqft = tracts.geometry.area)
)

In [23]:
parcels3 = pd.merge(parcels2, tracts, on = 'GEOID', how = 'left', validate = 'm:1')

In [24]:
df = parcels3.groupby(['GEOID', 'tract_sqft']).agg({'parcel_sqf':'sum'}).reset_index()
df['total_pct'] = df.parcel_sqf / df.tract_sqft

In [25]:
len(df)

1007

In [27]:
len(df[df.total_pct <= 1])

1007

In [None]:
parcels = gpd.read_file(
            f'zip+s3://{bucket_name}/gis/raw/la_parcels.zip').to_crs({'init':'epsg:2229'})

la_parcels_with_dups = gpd.read_file(
            f'zip+s3://{bucket_name}/gis/intermediate/la_parcels_with_dups.zip').to_crs({'init':'epsg:2229'})

crosswalk_parcels_tracts = pd.read_parquet(f's3://{bucket_name}/data/crosswalk_parcels_tracts.parquet')

In [None]:
parcels2 = pd.merge(parcels, crosswalk_parcels_tracts, on = 'AIN', validate = '1:1')
parcels2.head()

In [None]:
parcels3 = parcels2.drop_duplicates(subset = ['AIN', 'num_AIN'], keep = 'first')
parcels3.head()

In [None]:
tracts = catalog.census_tracts.read().to_crs({'init':'epsg:2229'})

tracts = (
    tracts[['GEOID10', 'geometry']]
    .rename(columns = {'GEOID10': 'GEOID', 'geometry':'tract_geom'})
    .assign(tract_sqft = tracts.geometry.area)
)

In [None]:
parcel_geom = pd.merge(parcels3, crosswalk_parcels_tracts, on = 'AIN', validate = '1:1')

In [None]:
tract_geom = pd.merge(parcel_geom, tracts, on = 'GEOID', validate = 'm:1')

In [None]:
df = tract_geom.groupby(['GEOID', 'tract_sqft']).agg({'parcel_sqft':'sum'}).reset_index()
df['total_pct'] = df.parcel_sqft / df.tract_sqft

In [None]:
wrong = df[df.total_pct > 1]

geoid = ['06037102104']
fix = wrong[wrong.GEOID.isin(geoid)]

wrong_parcels = parcel_geom[parcel_geom.GEOID.isin(geoid)]

In [None]:
wrong_tracts = pd.merge(fix, tracts, on = ['GEOID', 'tract_sqft'], how = 'inner')
wrong_tracts = gpd.GeoDataFrame(wrong_tracts)
wrong_tracts = wrong_tracts.set_geometry('tract_geom')
wrong_tracts.crs = {'init':'epsg:2229'}

wrong_parcels = gpd.GeoDataFrame(wrong_parcels)
wrong_parcels = wrong_parcels.set_geometry('parcel_geom')
wrong_parcels.crs = {'init':'epsg:2229'}

In [None]:
wrong_tracts.to_file(driver = 'GeoJSON', filename = '../gis/wrong_tracts.geojson')
wrong_parcels.to_file(driver = 'GeoJSON', filename = '../gis/wrong_parcels.geojson')

In [None]:
wrong_parcels.plot()

In [None]:
wrong_tracts.plot()