# QC: Units added = parcel capacity

In [38]:
import os
import sys

In [39]:
# append path to find utils module in urbansim
cwd = os.getcwd() 
parentdir =  os.path.abspath(os.path.join(cwd, os.pardir))
parentdir2 =  os.path.abspath(os.path.join(parentdir, os.pardir))
sys.path.append(parentdir2) # to get path to utils module

In [40]:
import pandas as pd
import numpy as np
import utils

In [41]:
from sqlalchemy import create_engine
from database import get_connection_string

In [42]:
%matplotlib inline

In [43]:
# connect to database
db_connection_string = get_connection_string('..\..\data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

In [44]:
# get versions
versions = utils.yaml_to_dict('../../data/scenario_config.yaml', 'scenario')

# Get Capacity

In [45]:
parcel_capacity_sql = '''
    SELECT [parcel_id],[capacity_2] as capacity, 'jur feedback' as cap_type
      FROM [urbansim].[urbansim].[parcel]
      WHERE [capacity_2] > 0 and ([site_id] IS NULL or site_id = 15008)'''
capacity_df = pd.read_sql(parcel_capacity_sql,mssql_engine)

In [46]:
assigned_parcel_sql = '''
SELECT  a.parcel_id, a.du as capacity, type as cap_type
   FROM [urbansim].[urbansim].[additional_capacity] a
   JOIN urbansim.parcel p on p.parcel_id = a.parcel_id
  WHERE version_id = %s'''
assigned_parcel_sql = assigned_parcel_sql % versions['additional_capacity_version']
assigned_df = pd.read_sql(assigned_parcel_sql, mssql_engine)
assigned_df.replace('cc', 'sgoa',inplace=True)
assigned_df.replace('mc', 'sgoa',inplace=True)
assigned_df.replace('tc', 'sgoa',inplace=True)
assigned_df.replace('tco', 'sgoa',inplace=True)
assigned_df.replace('uc', 'sgoa',inplace=True)

In [47]:
sched_dev_parcel_sql = '''
SELECT parcel_id,[capacity_3] as capacity,'sched dev' as cap_type  
FROM [urbansim].[urbansim].[scheduled_development_parcel]
where capacity_3> 0'''
sched_dev_df = pd.read_sql(sched_dev_parcel_sql, mssql_engine)

In [48]:
df = pd.concat([capacity_df,sched_dev_df,assigned_df],sort=True)

In [49]:
lookup_sql = '''
SELECT parcel_id,jur_id,cpa_id
FROM [isam].[xpef04].[parcel2015_mgra_jur_cpa] 
WHERE  i=1'''
lookup_df = pd.read_sql(lookup_sql, mssql_engine)
cocpa_names_sql = '''
    SELECT zone as cpa_id, name as cocpa
    FROM data_cafe.ref.geography_zone WHERE geography_type_id = 20'''
cocpa_names = pd.read_sql(cocpa_names_sql, mssql_engine)
cicpa_names_sql = '''
    SELECT zone as cpa_id, name as cicpa
    FROM data_cafe.ref.geography_zone WHERE geography_type_id = 15'''
cicpa_names = pd.read_sql(cicpa_names_sql, mssql_engine)
jur_name_sql = '''SELECT [jurisdiction_id] as jur_id,[name] as jur_name FROM [urbansim].[ref].[jurisdiction]'''
jur_name = pd.read_sql(jur_name_sql,mssql_engine)
lookup_df = pd.merge(lookup_df,cocpa_names,on='cpa_id',how='left')
lookup_df = pd.merge(lookup_df,cicpa_names,on='cpa_id',how='left')
lookup_df = pd.merge(lookup_df,jur_name,on='jur_id',how='left')
lookup_df['cpa_id'] = lookup_df['cpa_id'].astype(int)
lookup_df['jur_id'] = lookup_df['jur_id'].astype(int)
lookup_df['jcpa_name'] = lookup_df['jur_name']
lookup_df.loc[lookup_df.jur_id==14,'jcpa_name'] = lookup_df['cicpa']
lookup_df.loc[lookup_df.jur_id==19,'jcpa_name'] = lookup_df['cocpa']
lookup_df['jcpa'] = lookup_df['jur_id']
lookup_df.loc[lookup_df.jur_id==14,'jcpa'] = lookup_df['cpa_id']
lookup_df.loc[lookup_df.jur_id==19,'jcpa'] = lookup_df['cpa_id']
lookup_df.drop(columns=['cocpa', 'cicpa','jur_name','cpa_id'],inplace=True)

In [50]:
df = pd.merge(df,lookup_df,on='parcel_id',how='left')

In [51]:
df['parcel_id'] = df['parcel_id'].astype(int)

In [52]:
# df.loc[df.jcpa_name=='Pendleton-De Luz'].sort_values(by='parcel_id')

### Parcels with more than one capacity type

#### i.e. added more capacity (sgoa) to parcel than what jurisdiction provided

In [53]:
df['is_duplicated'] = df.duplicated(['parcel_id'])
duplicated_parcels = df.loc[df.is_duplicated].parcel_id.tolist()
print("number of parcels with more than one capacity type (duplicate parcels): {:,}".format(len(duplicated_parcels)))
parcels_w_multiple_cap_types = df.loc[df.parcel_id.isin(duplicated_parcels)]

number of parcels with more than one capacity type (duplicate parcels): 49


In [54]:
parcels_w_mult_cap_type = pd.pivot_table(parcels_w_multiple_cap_types, values='capacity', 
                                         index=['parcel_id','jcpa','jcpa_name'],
                                         columns=['cap_type'], aggfunc=np.sum)
parcels_w_mult_cap_type.fillna(0,inplace=True)
#print("parcels with more than one capacity type:")
#parcels_w_mult_cap_type.sort_values(by='jcpa')

In [55]:
# print("parcels with more than one capacity type:")
# parcels_w_mult_cap_type.sort_values(by='jcpa')

###  Sum capacity over parcels (so each row has a unique parcel id)

In [56]:
capacity = pd.DataFrame({'capacity': df.groupby(["parcel_id","jcpa","jcpa_name"])
                                          .capacity.sum()}).reset_index()

### check capacity sum 

In [57]:
print("capacity sum after group by: {:,}".format(capacity.capacity.sum()))

capacity sum after group by: 491,015.0


In [58]:
print("capacity sum before group by: {:,}".format(df.capacity.sum()))

capacity sum before group by: 491,015.0


# Get units added

In [59]:
# get max run id from urbansim
run_id_sql = '''
SELECT max(run_id)
  FROM [urbansim].[urbansim].[urbansim_lite_output]
'''
run_id_df = pd.read_sql(run_id_sql, mssql_engine)
run_id = int(run_id_df.values)
print("\n   Max run id : {:,}".format(run_id))


   Max run id : 411


In [60]:
# run_id = input()

In [61]:
output_sql = '''
    SELECT parcel_id,
           sum(unit_change) as unit_change
    FROM urbansim.urbansim.urbansim_lite_output urbansim_lite_output 
    WHERE  run_id =  %s
    GROUP BY parcel_id'''
output_sql = output_sql % run_id
output = pd.read_sql(output_sql,mssql_engine)

In [62]:
print("units added sum: {:,}".format(output.unit_change.sum()))

units added sum: 418,845


In [63]:
output_capacity = pd.merge(capacity,output,how='left',on='parcel_id')

In [64]:
# output_capacity.head()

In [65]:
output_capacity['unit_change'].fillna(0,inplace=True)

In [66]:
output_capacity['remaining'] = output_capacity['capacity'] - output_capacity['unit_change'] 

In [67]:
output_capacity.loc[output_capacity['remaining'] !=0]

Unnamed: 0,parcel_id,jcpa,jcpa_name,capacity,unit_change,remaining
2,168,1442,Uptown,3.0,0.0,3.0
33,297,1404,Downtown,13.0,0.0,13.0
42,319,1404,Downtown,9.0,0.0,9.0
127,614,1469,Southeastern:Southeastern San Diego,4.0,0.0,4.0
128,615,1469,Southeastern:Southeastern San Diego,5.0,0.0,5.0
130,635,1408,Greater Golden Hill,4.0,0.0,4.0
133,665,1408,Greater Golden Hill,8.0,0.0,8.0
135,677,1440,Torrey Pines,1.0,0.0,1.0
138,707,1402,Barrio Logan,7.0,0.0,7.0
148,817,1428,Greater North Park,7.0,0.0,7.0


In [68]:
output_capacity.loc[output_capacity['remaining'] !=0].to_csv('remaining1.csv')

In [69]:
output_capacity.unit_change.sum()

418845.0

In [70]:
output_capacity.capacity.sum()

491015.0

In [71]:
output_capacity.remaining.sum()

72170.0

In [72]:
#output_capacity.loc[output_capacity.jcpa_name=='Pendleton-De Luz']

# QC Pass/Fail

In [73]:
if output_capacity.remaining.sum() < df.capacity.sum() - output.unit_change.sum():
    print("for run id : {:,}\n".format(run_id))
    print('\n\nQC Fail: Urbansim capacity does not match units added by parcel\n\n')
    print("Remaining units: {:,}\n".format(output_capacity.remaining.sum()))
else:
    print("for run id : {:}\n".format(run_id))
    print('\nUrbansim capacity matches units added by parcel\n\n')
    print("Remaining units: {:,}\n".format(output_capacity.remaining.sum()))
    print('QC Pass\n\n\n')

for run id : 411


Urbansim capacity matches units added by parcel


Remaining units: 72,170.0

QC Pass





In [74]:
run_id

411