# Subregional QC: Target units and Capacity 

#### load packages and connect to database

In [None]:
# append path to find utils module in urbansim
import os 
import sys
cwd = os.getcwd() 
parentdir =  os.path.abspath(os.path.join(cwd, os.pardir))
sys.path.append(parentdir) # to get path to utils module

In [None]:
from sqlalchemy import create_engine
from database import get_connection_string
import pandas as pd
import numpy as np
import utils
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# connect to database and get version ids
db_connection_string = get_connection_string('..\data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)
versions = utils.yaml_to_dict('../data/scenario_config.yaml', 'scenario')

# Get Capacities

##### from urbansim.parcel

In [None]:
parcel_capacity_sql = '''
    SELECT [parcel_id],[jurisdiction_id],[cap_jurisdiction_id],[site_id],
           [du_2015],[du_2017],[capacity_1],[capacity_2]
      FROM [urbansim].[urbansim].[parcel]'''
capacity_df = pd.read_sql(parcel_capacity_sql,mssql_engine)
# ([site_id] IS NULL or site_id = 15008)
urbansim_parcel_capacity = int( capacity_df.loc[((capacity_df.site_id.isnull()) |\
                                                 (capacity_df.site_id==15008 ))].capacity_2.sum())
print("\nCapacity from urbansim.parcel where site id is null: {:,}".format(urbansim_parcel_capacity))
# 291,989
# 291,712
# 298,541

In [None]:
# SQL statement for parcels with additional (SGOA and ADU) capacity.
city_update_sql = '''
SELECT a.[parcel_id],[jurisdiction_id],[cap_jurisdiction_id],[site_id],
           [du_2015],[du_2017],[capacity_1],a.[du] AS capacity_2
FROM [urbansim].[urbansim].[additional_capacity] AS a
JOIN [urbansim].[parcel] AS p 
ON p.[parcel_id] = a.[parcel_id]
WHERE [version_id] = %s and type ='upd'
ORDER BY a.[parcel_id]
'''
city_update_sql = city_update_sql % versions['additional_capacity_version']
city_update_df = pd.read_sql(city_update_sql, mssql_engine)
city_update_df['site_id'] = city_update_df.site_id.astype(float)
# city_update_df.set_index('parcel_id',inplace=True)

In [None]:
city_update_df.capacity_2.sum()

In [None]:
capacity_df.loc[capacity_df.parcel_id == 5282707]

In [None]:
#capacity_df.set_index('parcel_id',inplace=True)
#capacity_df.update(city_update_df)
#capacity_df.reset_index(inplace=True)

In [None]:
capacity_df = pd.concat([capacity_df,city_update_df],sort=False).drop_duplicates(['parcel_id'],keep='last').sort_values('parcel_id')


In [None]:
capacity_df.loc[capacity_df.parcel_id == 131043]

In [None]:
# urbansim_parcel_capacity = int(capacity_df.loc[capacity_df.site_id.isnull()].capacity_2.sum())
urbansim_parcel_capacity = int( capacity_df.loc[((capacity_df.site_id.isnull()) |\
                                                 (capacity_df.site_id==15008 ) |\
                                                (capacity_df.parcel_id == 5637) )].capacity_2.sum())
print("\nCapacity from urbansim.parcel where site id is null: {:,}".format(urbansim_parcel_capacity))
# 359,195
# 364,413

In [None]:
# net difference in capacity since lost previous capacity_2 
364413-298541

##### from urbansim.additional_capacity

In [None]:
assigned_parcel_sql = '''
SELECT  a.parcel_id, cap_jurisdiction_id, jurisdiction_id, a.du, a.type
   FROM [urbansim].[urbansim].[additional_capacity] a
   JOIN urbansim.parcel p on p.parcel_id = a.parcel_id
  WHERE version_id = %s and type !='upd' '''
assigned_parcel_sql = assigned_parcel_sql % versions['additional_capacity_version']
assigned_df = pd.read_sql(assigned_parcel_sql, mssql_engine)
assigned_capacity = int(assigned_df.du.sum())
print("\nCapacity from urbansim.additional_capacity (ADU and SGOAs): {:,}".format(assigned_capacity))
assigned_df.type.unique()
sgoa_assigned = assigned_df.loc[assigned_df.type.isin(['mc', 'tco', 'uc', 'tc','cc'])]
adu_assigned  = assigned_df.loc[assigned_df.type=='adu']
sgoa_assigned_capacity = int(sgoa_assigned.du.sum())
adu_assigned_capacity = int(adu_assigned.du.sum())
print("\n      Capacity from sgoa: {:,}".format(sgoa_assigned_capacity))
print("\n      Capacity from adu: {:,}".format(adu_assigned_capacity))
# adus in city of san diego, chula vista, oceanside, el cajon
starting_2019_adu = assigned_df.loc[(assigned_df.cap_jurisdiction_id.isin([14,2,12,5])) &\
                                    (assigned_df.type=='adu')]
adu_2019_assigned_capacity = int(starting_2019_adu.du.sum())
print("\n                 note: 2019 to 2035 adu: {:,} \
(city of san diego, chula vista, oceanside, el cajon)".format(adu_2019_assigned_capacity))
# 5316
# 136,158
# 138,035
# 163,035
# 81,677 (58,534 sgoa & 23,143 adu)
# 37,574 (18,017 sgoa & 19,557 adu)

##### from "urbansim.scheduled_development_parcel" & "urbansim.urbansim.scheduled_development_priority"

In [None]:
sched_dev_parcel_sql = '''
SELECT [capacity_3] FROM [urbansim].[urbansim].[scheduled_development_parcel]'''
sched_dev_df = pd.read_sql(sched_dev_parcel_sql, mssql_engine)
sched_dev_capacity = int(sched_dev_df.capacity_3.sum())
print("\nScheduled development capacity from urbansim.scheduled_development_parcel : {:,}".\
      format(sched_dev_capacity))
sched_dev_do_not_use_sql = '''
SELECT [sched_version_id],s.[parcel_id],p.cap_jurisdiction_id,s.[site_id],[capacity_3]
  FROM [urbansim].[urbansim].[scheduled_development_priority] s
  JOIN urbansim.urbansim.parcel p ON p.parcel_id = s.parcel_id
  WHERE sched_version_id = %s'''
sched_dev_do_not_use_sql = sched_dev_do_not_use_sql % versions['sched_dev_version']
site_do_not_use = pd.read_sql(sched_dev_do_not_use_sql,mssql_engine)
sched_dev_capacity_do_not_use = int(site_do_not_use.capacity_3.sum())
print("\nScheduled developments from scheduled_development_priority: {:,}".format(sched_dev_capacity_do_not_use))
# 78,976

# Get target housing units

##### urbansim.urbansim_target_housing_units

In [None]:
units_needed_sql = '''
SELECT [yr], [version_id], [housing_units_add]
  FROM [urbansim].[urbansim].[urbansim_target_housing_units]
  WHERE version_id = %s'''
units_needed_sql = units_needed_sql % versions['target_housing_units_version']
units_needed_df =  pd.read_sql(units_needed_sql, mssql_engine)
units_needed_df['total_housing_units_add'] = units_needed_df['housing_units_add'].cumsum()
units_needed = int(sum(units_needed_df.housing_units_add))
print("\n  Target units to add : {:,}\n\
      (from urbansim.urbansim_target_housing_units version id {:})".\
      format(units_needed,versions['target_housing_units_version']))
# 468,866
# 534,000

In [None]:
#units_needed_df.to_csv('test2.csv')

## Total Capacity & Remaining Capacity

In [None]:
total_capacity = urbansim_parcel_capacity + assigned_capacity + sched_dev_capacity
print("\n\nTotal Capacity: {:,}\n".format(total_capacity))
print("       = {:,} (urbansim.parcel) + {:,} (sched dev) + {:,} (assigned capacity) \n".\
      format(urbansim_parcel_capacity,sched_dev_capacity,assigned_capacity))
print("            ({:,} urbansim.parcel and sched dev only)\n".format(urbansim_parcel_capacity+sched_dev_capacity))
remaining_capacity = total_capacity - units_needed
print("\n Remaining capacity 2050: {:,}\n       = {:,} capacity - {:,} units needed\n".\
format(remaining_capacity,total_capacity,units_needed))
# 507,123
# 509,000
# 452,365
# compare to 364,374 capacity previously

# Output of simulation 

##### from urbansim.urbansim_lite_output - get latest run id

In [None]:
# get max run id from urbansim
run_id_sql = '''
SELECT max(run_id)
  FROM [urbansim].[urbansim].[urbansim_lite_output]
'''
run_id_df = pd.read_sql(run_id_sql, mssql_engine)
run_id = int(run_id_df.values)

print("\n   Max run id : {:,}".format(run_id))

In [None]:
hs_change_sql = '''
    SELECT j.name, i.jur_id,
    sum(unit_change) as hs_change, capacity_type
      FROM urbansim.urbansim.urbansim_lite_output o 
      JOIN [isam].[xpef04].[parcel2015_mgra_jur_cpa] i on i.parcel_id = o.parcel_id
      JOIN urbansim.ref.jurisdiction j on i.jur_id = j.jurisdiction_id
     WHERE run_id =  %s and i = 1
     GROUP BY j.name,i.jur_id,capacity_type
      ORDER BY j.name,i.jur_id'''
hs_change_sql = hs_change_sql % run_id
hs = pd.read_sql(hs_change_sql,mssql_engine)

In [None]:
#hs.head()

### compare units added in subregional to capacities

In [None]:
units_added = int(hs.hs_change.sum())
units_from_sched_dev = int(hs.loc[hs.capacity_type=='sch'].hs_change.sum())
sched_dev_difference = int(sched_dev_capacity_do_not_use - units_from_sched_dev)
units_from_assigned = int(hs.loc[~hs.capacity_type.isin(['sch','jur'])].hs_change.sum())
assigned_difference = int(assigned_capacity - units_from_assigned)
units_from_urbansim_parcel = int(hs.loc[hs.capacity_type=='jur'].hs_change.sum())
urbansim_parcel_difference = int(urbansim_parcel_capacity - units_from_urbansim_parcel)
remaining_capacity_simulation = total_capacity - units_added
print("\nUnits added: {:,} (from capacity {:,})".\
      format(units_added,total_capacity))
print("\n    (Target: {:,})".format(units_needed))
print("\n       Urb parcel units: {:,} (capacity {:,}) (remaining {:,})".\
      format(units_from_urbansim_parcel,urbansim_parcel_capacity,urbansim_parcel_difference))
print("\n       Sched dev units: {:,} (capacity {:,}) (remaining {:,})".\
      format(units_from_sched_dev,sched_dev_capacity_do_not_use,sched_dev_difference))
print("\n       Additional units: {:,} (capacity {:,}) (remaining {:,})".\
      format(units_from_assigned,int(assigned_df.du.sum()),assigned_difference))
print("\n Total Unused Capacity : {:,}".format(remaining_capacity_simulation))

## If remaining jur capacity or sched dev capacity - where is it?

In [None]:
urb_parcel_cap = capacity_df.loc[(capacity_df.site_id==15008) | (capacity_df.site_id.isnull())]
urb_parcel_cap = urb_parcel_cap.loc[urb_parcel_cap.capacity_2>0]
# ([site_id] IS NULL or site_id = 15008)
urb_parcel_cap = urb_parcel_cap[['parcel_id','capacity_2']].copy()
urb_parcel_cap.rename(columns={"capacity_2": "capacity"}, inplace=True)
urb_parcel_cap['capacity_type'] = 'jur'

add_df = assigned_df[['parcel_id','du','type']].copy()
add_df.rename(columns={"du": "capacity"}, inplace=True)
add_df.rename(columns={"type": "capacity_type"}, inplace=True)
add_df.replace('cc', 'sgoa',inplace=True)
add_df.replace('mc', 'sgoa',inplace=True)
add_df.replace('tc', 'sgoa',inplace=True)
add_df.replace('tco', 'sgoa',inplace=True)
add_df.replace('uc', 'sgoa',inplace=True)

sched = site_do_not_use[['parcel_id','capacity_3']].copy()
sched.rename(columns={"capacity_3": "capacity"}, inplace=True)
sched['capacity_type'] = 'sch'

c1 = pd.concat([urb_parcel_cap,add_df,sched])
jur_sql = '''
    SELECT i.parcel_id,j.name, i.jur_id
      FROM [isam].[xpef04].[parcel2015_mgra_jur_cpa] i 
      JOIN urbansim.ref.jurisdiction j on i.jur_id = j.jurisdiction_id
     WHERE i = 1'''
j = pd.read_sql(jur_sql,mssql_engine)
c = pd.merge(c1,j,how='left',on=['parcel_id'])

In [None]:
urb_parcel_cap.capacity.sum()

In [None]:
dfout = pd.DataFrame({'capacity': c.groupby(["name","jur_id"])
                                          .capacity.sum()}).reset_index()

In [None]:
hs_change_sql = '''
    SELECT o.parcel_id,j.name, i.jur_id,capacity_type,
    sum(unit_change) as hs_change
      FROM urbansim.urbansim.urbansim_lite_output o 
      JOIN [isam].[xpef04].[parcel2015_mgra_jur_cpa] i on i.parcel_id = o.parcel_id
      JOIN urbansim.ref.jurisdiction j on i.jur_id = j.jurisdiction_id
     WHERE run_id =  %s and i = 1
     GROUP BY j.name,i.jur_id,o.parcel_id,capacity_type
      ORDER BY parcel_id'''
hs_change_sql = hs_change_sql % run_id
results = pd.read_sql(hs_change_sql,mssql_engine)
results.replace('cc', 'sgoa',inplace=True)
results.replace('mc', 'sgoa',inplace=True)
results.replace('tc', 'sgoa',inplace=True)
results.replace('tco', 'sgoa',inplace=True)
results.replace('uc', 'sgoa',inplace=True)

In [None]:
df = pd.merge(c,results,how='left',on=['parcel_id','capacity_type'])
df.hs_change.fillna(0,inplace=True)
df['capacity_unused'] = df['capacity'] - df['hs_change']
#df.loc[df['capacity_unused']!=0].head()

In [None]:
df.capacity_unused.sum()

In [None]:
results.hs_change.sum()

In [None]:
c.capacity.sum()

In [None]:
480212.0-418845

In [None]:
ununsed_cap_total = int(df.loc[df['capacity_unused']!=0].capacity_unused.sum())
print("\n Total Unused Capacity : {:,}".format(ununsed_cap_total))

In [None]:
66585 -  61367

In [None]:
480212.0-418845

In [None]:
#df.loc[df['capacity_unused']!=0].to_csv('test.csv')

In [None]:
unused_jur = df.loc[df.capacity_type=='jur']
ununsed_jur_total = int(unused_jur.capacity_unused.sum())
print("\n Unused Capacity from Jur Feedback : {:,}".format(ununsed_jur_total))

In [None]:
unused_jur.loc[unused_jur.capacity_unused>0].head()

In [None]:
jur_unused = pd.DataFrame({'unused_jur_capacity': unused_jur.
                                          groupby(["name_x","jur_id_x"])
                                          .capacity_unused.sum()}).reset_index()
jur_unused.rename(columns={"name_x": "jursidiction"}, inplace=True)

print("Jurisdictions with unused capacity from jurisdiction feedback:")
jur_unused.loc[jur_unused.unused_jur_capacity!=0]

In [None]:
unused_sch = df.loc[df.capacity_type=='sch']
ununsed_sch_total = int(unused_sch.capacity_unused.sum())
print("\n Unused capacity from sched dev : {:,}".format(ununsed_sch_total))

In [None]:
# unused_sch.loc[unused_sch.name_x=='Chula Vista']

In [None]:
sch_unused = pd.DataFrame({'unused_sch_capacity': unused_sch.
                                          groupby(["name_x"])
                                          .capacity_unused.sum()}).reset_index()
sch_unused.rename(columns={"name_x": "jursidiction"}, inplace=True)

print("Jurisdictions with unused capacity from sched dev:")
sch_unused.loc[sch_unused.unused_sch_capacity!=0]

In [None]:
# df.to_csv('test.csv')

### percent sgoa and adu compared to jur feedback for each  jur

In [None]:
df = pd.DataFrame({'capacity_by_type': c.groupby(["name",'capacity_type'])
                                          .capacity.sum()}).reset_index()

In [None]:
#pivot
df.head()
type_pivot = df.pivot(index='name', columns='capacity_type', \
                                                       values='capacity_by_type').reset_index().rename_axis(None, axis=1)
type_pivot.fillna(0,inplace=True)
type_pivot.set_index('name',inplace=True)
cols = ['sch','jur','adu','sgoa']
type_pivot = type_pivot[cols]
type_pivot['total'] = type_pivot['sch'] + type_pivot['jur'] + type_pivot['adu'] + type_pivot['sgoa']
#type_pivot[cols] = type_pivot[cols].applymap(np.int32)

In [None]:
type_pivot['percent_sgoa'] = (type_pivot['sgoa']/type_pivot['total'] * 100.0).round(1)

In [None]:
#type_pivot.dtypes

In [None]:
pd.options.display.float_format = '{:,.0f}'.format

In [None]:
type_pivot

In [None]:
type_pivot.sort_values(by='percent_sgoa')

### Compare parcel by parcel capacities and units added

In [None]:
# urb_parcel_cap = capacity_df.loc[(capacity_df.capacity_2>0) & (capacity_df.site_id.isnull())]

urb_parcel_cap = capacity_df.loc[(capacity_df.site_id==15008) | (capacity_df.site_id.isnull())]
urb_parcel_cap = urb_parcel_cap.loc[urb_parcel_cap.capacity_2>0]
urb_parcel_cap = urb_parcel_cap[['parcel_id','capacity_2']].copy()
urb_parcel_cap.rename(columns={"capacity_2": "capacity_jur"}, inplace=True)

add_df = assigned_df[['parcel_id','du']].copy()
add_df.rename(columns={"du": "capacity_assigned"}, inplace=True)

sched = site_do_not_use[['parcel_id','capacity_3']].copy()
sched.rename(columns={"capacity_3": "capacity_sch"}, inplace=True)

all_parcels_w_cap = pd.concat([urb_parcel_cap[['parcel_id']],add_df[['parcel_id']],sched[['parcel_id']]])
all_parcels_w_cap['is_duplicated'] = all_parcels_w_cap.duplicated(['parcel_id'])
unique_parcels = len(all_parcels_w_cap.parcel_id.unique())


print("\n Number of parcels with capacity from jur feedback : {:,}".format(len(urb_parcel_cap)))
print("\n Number of parcels with capacity from sched dev : {:,}".format(len(sched)))
print("\n Number of parcels with additional capacity : {:,}".format(len(add_df)))
print("\n Total number of parcels with capacity \n (includes duplicate parcels with different capacity types) : {:,}".\
      format(len(urb_parcel_cap) + len(sched) + len(add_df)))

#### parcels with more than one capacity type ("duplicate") 

In [None]:
print("\n Total number of parcels with capacity \n   (includes duplicate parcels with different capacity types) : {:,}".\
      format(len(urb_parcel_cap) + len(sched) + len(add_df)))
print("\n Total number of UNIQUE parcels with capacity : {:,}".format(unique_parcels))
print("\n Total number of parcels with capacity from more than one type : {:,}".\
      format(all_parcels_w_cap['is_duplicated'].sum()))
#133

#### capacity types of parcels that are "duplicated"

In [None]:
duplicated_parcels = all_parcels_w_cap.loc[all_parcels_w_cap.is_duplicated].parcel_id.tolist()
jur_dup = urb_parcel_cap.loc[urb_parcel_cap.parcel_id.isin(duplicated_parcels)]
print("\n Number of parcels with capacity from jurisdictions that are duplicates :{:,}".\
      format(len(jur_dup)))
add_dup = add_df.loc[add_df.parcel_id.isin(duplicated_parcels)]
print("\n Number of parcels with capacity from additional that are duplicates :{:,}".\
      format(len(add_dup)))
sch_dup = sched.loc[sched.parcel_id.isin(duplicated_parcels)]
print("\n Number of parcels with capacity from sched dev that are duplicates :{:,}".\
      format(len(sch_dup)))

In [None]:
pd.options.display.float_format = '{:,.0f}'.format

### Merge capacity for each parcel with units built for each parcel

In [None]:
#cap_df = capacity_df[['parcel_id','cap_jurisdiction_id']].copy()
#cap_df = pd.merge(cap_df,urb_parcel_cap,how='left',on='parcel_id')
cap_df = pd.merge(urb_parcel_cap,add_df,how='outer',on='parcel_id')
cap_df = pd.merge(cap_df,sched,how='outer',on='parcel_id')
cap_df.fillna(0,inplace=True)
cap_df['capacity'] = cap_df['capacity_jur'] + cap_df['capacity_assigned'] + cap_df['capacity_sch']
cap = cap_df.copy()
cap.sort_values(by='parcel_id',inplace=True)
cap.reset_index(inplace=True,drop=True)

#### length of merge df should match unique parcels & sum of capacity should match total capacity

In [None]:
print ("\n Length of capacity df : {:,}".format(len(cap)))
print("\n Total number of UNIQUE parcels with capacity : {:,}".format(unique_parcels))
print("\n Sum of capacity from df : {:,}".format(cap.capacity.sum())) 
print("\n Total capacity : {:,}".format(total_capacity)) 

#### units added should match units_needed 

In [None]:
units_by_parcel_sql = '''
    SELECT parcel_id,sum(unit_change) as unit_change
      FROM urbansim.urbansim.urbansim_lite_output  
     WHERE run_id =  %s
     GROUP BY parcel_id
      ORDER BY parcel_id'''
units_by_parcel_sql = units_by_parcel_sql % run_id
unit_by_parcel = pd.read_sql(units_by_parcel_sql,mssql_engine)

In [None]:
unit_by_parcel.unit_change.sum()

In [None]:
units_needed 

### number of parcels with new capacity added during simulation

In [None]:
len(unit_by_parcel)

#### find parcels with more than one capacity type

In [None]:
#duplicated_parcels
cap['is_duplicated'] = cap.parcel_id.isin(duplicated_parcels)

In [None]:
# cap.loc[cap.is_duplicated].head()

#### merge units added with capacity for parcel by parcel comparison

In [None]:
compare_cap_w_units = pd.merge(unit_by_parcel,cap,on='parcel_id',how='outer')
compare_cap_w_units.fillna(0,inplace=True)
compare_cap_w_units['remaining_cap'] = compare_cap_w_units['capacity'] - compare_cap_w_units['unit_change']

#### length should be the unique parcels with capacity

In [None]:
print("\n Length of df with units : {:,}".format(len(compare_cap_w_units))) 
print("\n Total number of UNIQUE parcels with capacity : {:,}".format(unique_parcels))
print("\n Sum of unit change : {:,}".format(compare_cap_w_units.unit_change.sum()))
print("\n Units needed : {:,}".format(units_needed ))
print("\n Capacity sum : {:,}".format(compare_cap_w_units.capacity.sum()))
print("\n Total capacity : {:,}".format(total_capacity ))

In [None]:
# compare_cap_w_units.head()

### parcels that have more than one capacity type

In [None]:
parcels_dups = compare_cap_w_units.loc[compare_cap_w_units.is_duplicated]

#### length should match all parcels_w_cap duplicates

In [None]:
print("\n Length parcels with dups df : {:,}".format(len(parcels_dups)))
print("\n Number of parcels dup using is_duplicate : {:,}".format(all_parcels_w_cap['is_duplicated'].sum()))

## No parcels should have negative remaining capacity?

In [None]:
compare_cap_w_units.loc[compare_cap_w_units.remaining_cap<0]

## total remaining capacity should match capacity - units needed

In [None]:
print("\n remaining capacity sum : {:,}".format(compare_cap_w_units.remaining_cap.sum()))
print("\n Total capacity minus units needed : {:,}".format(total_capacity - units_needed)) 

## all parcels with remaining capacity have additional capacity (greater than zero)

In [None]:
parcels_w_remaining = compare_cap_w_units.loc[compare_cap_w_units.remaining_cap>0]
print("\n Total number of parcels with remaining capacity : {:,}".\
      format(len(parcels_w_remaining)))
print("\n Total number of parcels with remaining capacity and assigned capacity>0 : {:,}".\
      format(len(parcels_w_remaining.loc[parcels_w_remaining.capacity_assigned>0])))

### parcels with remaining capacity that had no units added

In [None]:
parcels_not_used = len(parcels_w_remaining.loc[parcels_w_remaining.remaining_cap==parcels_w_remaining.capacity])
print("\n Total number of parcels not used : {:,}".\
      format((parcels_not_used)))

### parcels that are partially used

In [None]:
len(parcels_w_remaining) - parcels_not_used

### parcels with remaining capacity that had some units added

In [None]:
parcels_not_used = parcels_w_remaining.loc[parcels_w_remaining.capacity>parcels_w_remaining.remaining_cap]

In [None]:
len(parcels_not_used)

In [None]:
# parcels_not_used.head()

#### No parcels with remaining capacity should have capacity assigned less than or equal to zero

In [None]:
parcels_w_remaining.loc[parcels_w_remaining.capacity_assigned<=0]

#### No parcel with remaining capacity should have a unit change less than jurisdiction provided capacity

In [None]:
parcels_w_remaining.loc[parcels_w_remaining.unit_change<parcels_w_remaining.capacity_jur]

In [None]:
# parcels_w_remaining.loc[parcels_w_remaining.capacity_assigned<=0]
# parcels_w_remaining.loc[parcels_w_remaining.unit_change<parcels_w_remaining.capacity_jur]

#### capacity type of parcels with remaining capacity

In [None]:
assigned_df.replace('cc', 'sgoa',inplace=True)
assigned_df.replace('mc', 'sgoa',inplace=True)
assigned_df.replace('tc', 'sgoa',inplace=True)
assigned_df.replace('tco', 'sgoa',inplace=True)
assigned_df.replace('uc', 'sgoa',inplace=True)
assigned_df.head()

In [None]:
assigned_df.parcel_id = assigned_df.parcel_id.astype(int)
urb_parcel_cap.parcel_id = urb_parcel_cap.parcel_id.astype(int)

In [None]:
sgoa = assigned_df[['parcel_id','du']].loc[assigned_df.type=='sgoa'].copy()
sgoa.rename(columns={'du':'capacity_sgoa'},inplace=True)

adu = assigned_df[['parcel_id','du']].loc[assigned_df.type=='adu'].copy()
adu.rename(columns={'du':'capacity_adu'},inplace=True)


In [None]:
#cap_df = capacity_df[['parcel_id','cap_jurisdiction_id']].copy()
#cap_df = pd.merge(cap_df,urb_parcel_cap,how='left',on='parcel_id')
cap_df = pd.merge(urb_parcel_cap,sgoa,how='outer',on='parcel_id')
cap_df = pd.merge(cap_df,adu,how='outer',on='parcel_id')
cap_df = pd.merge(cap_df,sched,how='outer',on='parcel_id')
cap_df.fillna(0,inplace=True)
cap_df['capacity'] = cap_df['capacity_jur'] + cap_df['capacity_sgoa'] + cap_df['capacity_adu'] + cap_df['capacity_sch']
cap = cap_df.copy()
cap.sort_values(by='parcel_id',inplace=True)
cap.reset_index(inplace=True,drop=True)

In [None]:
print ("\n Length of capacity df : {:,}".format(len(cap)))
print("\n Total number of UNIQUE parcels with capacity : {:,}".format(unique_parcels))
print("\n Sum of capacity from df : {:,}".format(cap.capacity.sum())) 
print("\n Total capacity : {:,}".format(total_capacity)) 

In [None]:
cap['is_duplicated'] = cap.parcel_id.isin(duplicated_parcels)

In [None]:
compare_cap_w_units = pd.merge(unit_by_parcel,cap,on='parcel_id',how='outer')
compare_cap_w_units.fillna(0,inplace=True)
compare_cap_w_units['remaining_cap'] = compare_cap_w_units['capacity'] - compare_cap_w_units['unit_change']

In [None]:
print("\n Length of df with units : {:,}".format(len(compare_cap_w_units))) 
print("\n Total number of UNIQUE parcels with capacity : {:,}".format(unique_parcels))
print("\n Sum of unit change : {:,}".format(compare_cap_w_units.unit_change.sum()))
print("\n Units needed : {:,}".format(units_needed ))
print("\n Capacity sum : {:,}".format(compare_cap_w_units.capacity.sum()))
print("\n Total capacity : {:,}".format(total_capacity ))

In [None]:
print("\n Length parcels with dups df : {:,}".format(len(parcels_dups)))
print("\n Number of parcels dup using is_duplicate : {:,}".format(all_parcels_w_cap['is_duplicated'].sum()))
print("\n remaining capacity sum : {:,}".format(compare_cap_w_units.remaining_cap.sum()))
print("\n Total capacity minus units needed : {:,}".format(total_capacity - units_needed)) 

In [None]:
print("parcels with negative remaining capacity")
compare_cap_w_units.loc[compare_cap_w_units.remaining_cap<0]

In [None]:
parcels_w_remaining = compare_cap_w_units.loc[compare_cap_w_units.remaining_cap>0]
print("\n Total number of parcels with remaining capacity : {:,}".\
      format(len(parcels_w_remaining)))
print("\n Total number of parcels with remaining capacity and adu>0 or sgoa>0 : {:,}".\
      format(len(parcels_w_remaining.loc[((parcels_w_remaining.capacity_adu>0) \
                                         | (parcels_w_remaining.capacity_sgoa>0))])))

In [None]:
parcels_not_used = len(parcels_w_remaining.loc[parcels_w_remaining.remaining_cap==parcels_w_remaining.capacity])
print("\n Total number of parcels not used : {:,}".\
      format((parcels_not_used)))
partial_used = len(parcels_w_remaining) - parcels_not_used
print("\n Total number of parcels that are partially used : {:,}".\
      format((partial_used)))
print("\n Total number of parcels that have remaining capacity : {:,}".\
      format((partial_used + parcels_not_used)))

In [None]:
parcels_partially_used = parcels_w_remaining.loc[parcels_w_remaining.capacity>parcels_w_remaining.remaining_cap]
len(parcels_partially_used)

In [None]:
parcels_w_remaining.loc[((parcels_w_remaining.capacity_adu<=0) & (parcels_w_remaining.capacity_sgoa<=0))]

In [None]:
parcels_w_remaining.loc[parcels_w_remaining.unit_change<parcels_w_remaining.capacity_jur]

In [None]:
parcels_w_remaining.head()

In [None]:
sgoa_remaining = parcels_w_remaining.loc[((parcels_w_remaining.capacity_sgoa>0) &\
                                         (parcels_w_remaining.capacity_adu<=0)) ]
len(sgoa_remaining)

In [None]:
adu_remaining = parcels_w_remaining.loc[((parcels_w_remaining.capacity_sgoa<=0) &\
                                         (parcels_w_remaining.capacity_adu>0)) ]
len(adu_remaining)

In [None]:
print("\n Total number of parcels that have remaining capacity : {:,}".\
      format((len(sgoa_remaining) + len(adu_remaining))))
print("\n Total number of parcels that have remaining capacity : {:,}".\
      format((len(parcels_w_remaining))))

In [None]:
sgoaur = sgoa_remaining.remaining_cap.sum()
print("\n Total number of units of remaining from sgoas : {:,}".\
      format((sgoaur )))
aduur = adu_remaining.remaining_cap.sum()
print("\n Total number of units of remaining from adus : {:,}".\
      format((aduur )))
print("\n Total number of units of remaining : {:,}".\
      format((sgoaur + aduur )))
print("\n Total capacity minus units needed : {:,}".format(total_capacity - units_needed)) 

In [None]:
adu_remaining.loc[adu_remaining.capacity_jur>0]

In [None]:
sgoa_remaining_Partial_use = sgoa_remaining.loc[(sgoa_remaining.unit_change>0)]

In [None]:
len(sgoa_remaining_Partial_use)

In [None]:
partial_use_w_capacity_jur = sgoa_remaining_Partial_use.loc[sgoa_remaining_Partial_use.capacity_jur>0]

In [None]:
partial_use_w_capacity_sgoa = sgoa_remaining_Partial_use.loc[sgoa_remaining_Partial_use.capacity_jur==0]

In [None]:
len(partial_use_w_capacity_jur)

In [None]:
print("\n Total number of partial used parcels : {:,}".format(len(sgoa_remaining_Partial_use))) 
print("\n Total number of parcels that have jurisdiction capacity but remaining sgoa capacity : {:,}".\
      format(len(partial_use_w_capacity_jur)))
print("\n Total number of partial used parcels that have only sgoa capacity : {:,}".\
      format(len(partial_use_w_capacity_sgoa)))

In [None]:
# sgoa_remaining_w_capacity_jur

In [None]:
# parcels_partially_used

## Check that sched dev is used up

In [None]:
sched_devs = compare_cap_w_units.loc[compare_cap_w_units.capacity_sch>0]

In [None]:
sched_devs.head()

##### do any of the parcels with sched dev capacity have additional capacity?

In [None]:
# any sched devs with assigned capacity?
sched_devs.loc[sched_devs.capacity_sgoa>0]

In [None]:
sched_devs.loc[sched_devs.capacity_adu>0]

#### do any of the parcels with sched dev capacity have jur feedback capacity?

In [None]:
# any sched devs with jur capacity?
sched_devs.loc[sched_devs.capacity_jur>0]

#### does sched dev capacity from merged df matches urbansim.scheduled_development_parcel capacity?

In [None]:
print(sched_devs.capacity.sum()) # merged df
print(sched_dev_capacity) #urbansim.scheduled_development_parcel 

#### unit change for sched dev should match capacity

In [None]:
print("\n Unit change for sched dev parcels : {:,}".\
      format(sched_devs.unit_change.sum()))

#### check no remaining capacity for sched dev parcels

In [None]:
sched_devs.loc[sched_devs.remaining_cap>0]

### compare jurisdiction capacity to output of simulation by jurisdiction

### make sure duplicate adus do not get picked

In [None]:
# make sure no adus get picked twice
# total number adus
adu_sql = '''SELECT count(*)
  FROM [urbansim].[urbansim].[urbansim_lite_output]
  where run_id = %s and capacity_type = 'adu' '''
adu_sql = adu_sql % run_id
adu_use = pd.read_sql(adu_sql,mssql_engine)
print("\n Number of parcels with adu unit change : {:,}".format(int(adu_use.values)))
#22,102
# distinct adus
adu_distinct_sql = ''' SELECT count(distinct([parcel_id]))
  FROM [urbansim].[urbansim].[urbansim_lite_output]
  where run_id = %s and capacity_type = 'adu' '''
adu_distinct_sql  =adu_distinct_sql  % run_id
adu_use_distinct = pd.read_sql(adu_distinct_sql,mssql_engine)
print("\n Number of distinct parcels with adu unit change : {:,}".format(int(adu_use_distinct.values)))
#22,102

In [None]:
hs_change_sql = '''
    SELECT j.name, i.jur_id,year_simulation,
    sum(unit_change) as hs_change, capacity_type
      FROM urbansim.urbansim.urbansim_lite_output o 
      JOIN [isam].[xpef04].[parcel2015_mgra_jur_cpa] i on i.parcel_id = o.parcel_id
      JOIN urbansim.ref.jurisdiction j on i.jur_id = j.jurisdiction_id
     WHERE run_id =  %s and i = 1
     GROUP BY j.name,i.jur_id,capacity_type,year_simulation
      ORDER BY j.name,i.jur_id'''
hs_change_sql = hs_change_sql % run_id
hs = pd.read_sql(hs_change_sql,mssql_engine)

In [None]:
units_added_by_yr = pd.DataFrame({'subregional_simulation': hs.
                                          groupby(["year_simulation"])
                                          .hs_change.sum()}).reset_index()

## compare units added in subregional to housing unit targets in forecast

In [None]:
units_added_by_yr = pd.DataFrame({'subregional_simulation': hs.
                                          groupby(["year_simulation"])
                                          .hs_change.sum()}).reset_index()
housing_units_by_yr = pd.merge(units_needed_df[['yr','housing_units_add']],units_added_by_yr,\
                               left_on='yr',right_on='year_simulation')
housing_units_by_yr['housing_units_add'] = housing_units_by_yr['housing_units_add'].astype(int)
housing_units_by_yr.rename(columns={"housing_units_add": "forecast"}, inplace=True)
housing_units_by_yr.drop(['year_simulation'], axis=1,inplace=True)
housing_units_by_yr.set_index('yr',inplace=True)
axes = housing_units_by_yr.plot(style='.-',subplots=True, figsize=(6, 6));
ptitle = 'DRAFT Housing Units By Year \n(version_id=' + \
str(versions['target_housing_units_version']) + ')'
axes.flat[0].set_title(ptitle,size=16) 
axes[0].set_ylabel('housing units')
axes[1].set_ylabel('housing units')


## Housing unit target cumulative sum

In [None]:
housing_units_by_yr['cumulative sum hs forecast'] = housing_units_by_yr.forecast.cumsum()
housing_units_by_yr['cumulative sum simulation'] = housing_units_by_yr.subregional_simulation.cumsum()
housing_units_by_yr['diff'] = housing_units_by_yr['forecast'] - housing_units_by_yr['subregional_simulation']
housing_units_by_yr

## Housing Units by Capacity Type Plot

#### replace sgoa subtypes with "sgoa"

In [None]:
hs2 = hs.copy()
hs2.replace('cc', 'sgoa',inplace=True)
hs2.replace('mc', 'sgoa',inplace=True)
hs2.replace('tc', 'sgoa',inplace=True)
hs2.replace('tco', 'sgoa',inplace=True)
hs2.replace('uc', 'sgoa',inplace=True)

#### group by year simulation and capacity type and sum unit change

In [None]:
units_added_by_capacity_type_and_yr = pd.DataFrame({'units_by_type': hs2.
                                          groupby(["year_simulation","capacity_type"])
                                          .hs_change.sum()}).reset_index()

#### pivot to make capacity types as columns: 'sch','jur','adu','sgoa'

In [None]:
type_pivot = units_added_by_capacity_type_and_yr.pivot(index='year_simulation', columns='capacity_type', \
                                                       values='units_by_type').reset_index().rename_axis(None, axis=1)
type_pivot.fillna(0,inplace=True)
type_pivot.set_index('year_simulation',inplace=True)
# cols = ['sch','jur','adu','sgoa']
cols = ['sch','jur']
type_pivot = type_pivot[cols]
type_pivot[cols] = type_pivot[cols].applymap(np.int32)

In [None]:
# type_pivot = type_pivot.loc[type_pivot.index.isin(list(range(2017,2023)))]

In [None]:
dirname = os.path.join(os.getcwd(),'plots')#,datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
try:
    os.stat(dirname)
except:
    os.mkdir(dirname)  

In [None]:
# NOTE: the width and fig size are important to show adu. 
# if fig size and width are too small the bar is no longer visible.
# see https://stackoverflow.com/questions/47740478/matplotlib-bar-chart-some-bars-are-not-visible-and-seem-to-be-of-different-width


plt.style.use('ggplot')
#plt.style.use('default')
ax = type_pivot.plot(kind='bar',stacked=True,width=0.5,
                     #color = ['#ff7f0e','#9467bd','#1f77b4','#2ca02c'],
                     color = ['#1f77b4','#ff7f0e','#2ca02c','#d62728'],
                     # colormap = 'tab10', #colormap='coolwarm',\ Paired
                     figsize=(18, 8)) #(stacked=True,colormap='Paired')
ax.set_ylabel("housing units added",size=20)
ax.set_xlabel("forecast year",size=20)
ptitle = 'DRAFT Housing Units (' + str(units_added) + ')' + ' By Capacity Type\nversion id = '\
+ str(versions['target_housing_units_version'])
ax.set_title(ptitle,size=20)
plt.legend(loc=1, prop={'size': 16})
plt.tick_params(axis='both', which='major', labelsize=14) 
plt.show()
fig = ax.get_figure()
plotname = dirname + '//' + str(run_id) + 'barplot_region.png'
fig.savefig(plotname)

### QC "units to add" not equal "units added"

In [None]:
hu = housing_units_by_yr.copy()
hu.rename(columns={"forecast":"hu_forecast"}, inplace=True)
hu.rename(columns={"subregional_simulation":"hu_subregional"}, inplace=True)
hu['DIFF'] = hu.hu_subregional - hu.hu_forecast
hu['cum_sum_forecast'] = hu.hu_forecast.cumsum()
hu['cum_sum_subregional'] = hu.hu_subregional.cumsum()
print('\n\nQC: Years that "Units to Add" not equal to "Units Added"')
print(hu.loc[hu.hu_forecast != hu.hu_subregional])

### Table: DRAFT Housing Units by Capacity Type (and Percentage of Total Units)

In [None]:
table_of_units = type_pivot.copy()
table_of_units['total_units'] = table_of_units.sum(axis=1)
# table_of_units[['sch','jur','adu','sgoa','total_units']] =table_of_units[['sch','jur','adu','sgoa','total_units']].astype(int)
table_of_units[['sch','jur','total_units']] =table_of_units[['sch','jur','total_units']].astype(int)
# table_of_units['cumulative units'] = table_of_units['total_units'].cumsum()
table_of_units.loc['Total']= table_of_units.sum()
# table_of_units.loc[table_of_units.index=='Total','cumulative units'] = ''
table_of_units

In [None]:
percent_of_units = type_pivot.copy()
percent_of_units['total_units'] = percent_of_units.sum(axis=1)
percent_of_units['adu_share_per_year'] = percent_of_units.adu / percent_of_units.total_units
percent_of_units['adu_share_per_year'] = (percent_of_units.adu_share_per_year * 100).round(2)
percent_of_units['adu_share_per_year'] = percent_of_units['adu_share_per_year'].astype(str) + '%'

percent_of_units['sch_share_per_year'] = percent_of_units.sch / percent_of_units.total_units
percent_of_units['sch_share_per_year'] = (percent_of_units.sch_share_per_year * 100).round(2)
percent_of_units['sch_share_per_year'] = percent_of_units['sch_share_per_year'].astype(str) + '%'

percent_of_units['jur_share_per_year'] = percent_of_units.jur / percent_of_units.total_units
percent_of_units['jur_share_per_year'] = (percent_of_units.jur_share_per_year * 100).round(2)
percent_of_units['jur_share_per_year'] = percent_of_units['jur_share_per_year'].astype(str) + '%'

percent_of_units['sgoa_share_per_year'] = percent_of_units.sgoa / percent_of_units.total_units
percent_of_units['sgoa_share_per_year'] = (percent_of_units.sgoa_share_per_year * 100).round(2)
percent_of_units['sgoa_share_per_year'] = percent_of_units['sgoa_share_per_year'].astype(str) + '%'
percent_of_units[['sch_share_per_year','jur_share_per_year','adu_share_per_year','sgoa_share_per_year']]



### Housing units urbansim.parcel (2017) compared to [isam].[defm].[housing_units] 

In [None]:
du_2017 = capacity_df.du_2017.sum()
print("\n Housing units 2017 from urbansim.parcel: {:,}\n".format(du_2017))
housing_unit_estimates_sql = '''SELECT  [yr],[du]FROM [isam].[defm].[housing_units]'''
housing_unit_estimates = pd.read_sql(housing_unit_estimates_sql, mssql_engine)
print("\n Housing units 2017 from estimates in defm.housing_units: {:,}\n".\
      format(int(housing_unit_estimates.loc[housing_unit_estimates.yr==2017].du)))
print("\n DIFFERENCE in housing units urbansim and defm.housing_units: {:,}\n".\
      format(int(housing_unit_estimates.loc[housing_unit_estimates.yr==2017].du)\
                - du_2017 ))

print("\n Total Housing units 2050: {:,}\n\n (= {:,} du 2017 +  {:,} units needed) \n".\
      format(du_2017 + units_needed,du_2017,units_needed)) 
# housing units du_2017 from urbansim.parcel
# 1,192,748
# housing units from estimates:
# 2015: 1,183,211
# 2016: 1,194,603
# 2017: 1,205,531
# 2018: 1,217,060

In [None]:
### QC HS per year