# QC: Units added = parcel capacity

In [None]:
import os
import sys

In [None]:
# append path to find utils module in urbansim
cwd = os.getcwd() 
parentdir =  os.path.abspath(os.path.join(cwd, os.pardir))
parentdir2 =  os.path.abspath(os.path.join(parentdir, os.pardir))
sys.path.append(parentdir2) # to get path to utils module

In [None]:
import pandas as pd
import numpy as np
import utils

In [None]:
from sqlalchemy import create_engine
from database import get_connection_string

In [None]:
%matplotlib inline

In [None]:
# connect to database
db_connection_string = get_connection_string('..\..\data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

In [None]:
# get versions
versions = utils.yaml_to_dict('../../data/scenario_config.yaml', 'scenario')

# Get Capacity

In [None]:
parcel_capacity_sql = '''
    SELECT [parcel_id],[capacity_2] as capacity, 'jur feedback' as cap_type
      FROM [urbansim].[urbansim].[parcel]
      WHERE [capacity_2] > 0 and ([site_id] IS NULL or site_id = 15008)'''
capacity_df = pd.read_sql(parcel_capacity_sql,mssql_engine)

In [None]:
assigned_parcel_sql = '''
SELECT  a.parcel_id, a.du as capacity, type as cap_type
   FROM [urbansim].[urbansim].[additional_capacity] a
   JOIN urbansim.parcel p on p.parcel_id = a.parcel_id
  WHERE version_id = %s'''
assigned_parcel_sql = assigned_parcel_sql % versions['additional_capacity_version']
assigned_df = pd.read_sql(assigned_parcel_sql, mssql_engine)
assigned_df.replace('cc', 'sgoa',inplace=True)
assigned_df.replace('mc', 'sgoa',inplace=True)
assigned_df.replace('tc', 'sgoa',inplace=True)
assigned_df.replace('tco', 'sgoa',inplace=True)
assigned_df.replace('uc', 'sgoa',inplace=True)

In [None]:
sched_dev_parcel_sql = '''
SELECT parcel_id,[capacity_3] as capacity,'sched dev' as cap_type  
FROM [urbansim].[urbansim].[scheduled_development_parcel]
where capacity_3> 0'''
sched_dev_df = pd.read_sql(sched_dev_parcel_sql, mssql_engine)

In [None]:
df = pd.concat([capacity_df,sched_dev_df,assigned_df],sort=True)

In [None]:
lookup_sql = '''
SELECT parcel_id,jur_id,cpa_id
FROM [isam].[xpef04].[parcel2015_mgra_jur_cpa] 
WHERE  i=1'''
lookup_df = pd.read_sql(lookup_sql, mssql_engine)
cocpa_names_sql = '''
    SELECT zone as cpa_id, name as cocpa
    FROM data_cafe.ref.geography_zone WHERE geography_type_id = 20'''
cocpa_names = pd.read_sql(cocpa_names_sql, mssql_engine)
cicpa_names_sql = '''
    SELECT zone as cpa_id, name as cicpa
    FROM data_cafe.ref.geography_zone WHERE geography_type_id = 15'''
cicpa_names = pd.read_sql(cicpa_names_sql, mssql_engine)
jur_name_sql = '''SELECT [jurisdiction_id] as jur_id,[name] as jur_name FROM [urbansim].[ref].[jurisdiction]'''
jur_name = pd.read_sql(jur_name_sql,mssql_engine)
lookup_df = pd.merge(lookup_df,cocpa_names,on='cpa_id',how='left')
lookup_df = pd.merge(lookup_df,cicpa_names,on='cpa_id',how='left')
lookup_df = pd.merge(lookup_df,jur_name,on='jur_id',how='left')
lookup_df['cpa_id'] = lookup_df['cpa_id'].astype(int)
lookup_df['jur_id'] = lookup_df['jur_id'].astype(int)
lookup_df['jcpa_name'] = lookup_df['jur_name']
lookup_df.loc[lookup_df.jur_id==14,'jcpa_name'] = lookup_df['cicpa']
lookup_df.loc[lookup_df.jur_id==19,'jcpa_name'] = lookup_df['cocpa']
lookup_df['jcpa'] = lookup_df['jur_id']
lookup_df.loc[lookup_df.jur_id==14,'jcpa'] = lookup_df['cpa_id']
lookup_df.loc[lookup_df.jur_id==19,'jcpa'] = lookup_df['cpa_id']
lookup_df.drop(columns=['cocpa', 'cicpa','jur_name','cpa_id'],inplace=True)

In [None]:
df = pd.merge(df,lookup_df,on='parcel_id',how='left')

In [None]:
df['parcel_id'] = df['parcel_id'].astype(int)

In [None]:
# df.loc[df.jcpa_name=='Pendleton-De Luz'].sort_values(by='parcel_id')

### Parcels with more than one capacity type

#### i.e. added more capacity (sgoa) to parcel than what jurisdiction provided

In [None]:
df['is_duplicated'] = df.duplicated(['parcel_id'])
duplicated_parcels = df.loc[df.is_duplicated].parcel_id.tolist()
print("number of parcels with more than one capacity type (duplicate parcels): {:,}".format(len(duplicated_parcels)))
parcels_w_multiple_cap_types = df.loc[df.parcel_id.isin(duplicated_parcels)]

In [None]:
parcels_w_mult_cap_type = pd.pivot_table(parcels_w_multiple_cap_types, values='capacity', 
                                         index=['parcel_id','jcpa','jcpa_name'],
                                         columns=['cap_type'], aggfunc=np.sum)
parcels_w_mult_cap_type.fillna(0,inplace=True)
print("parcels with more than one capacity type:")
# parcels_w_mult_cap_type.sort_values(by='jcpa')

###  Sum capacity over parcels (so each row has a unique parcel id)

In [None]:
capacity = pd.DataFrame({'capacity': df.groupby(["parcel_id","jcpa","jcpa_name"])
                                          .capacity.sum()}).reset_index()

### check capacity sum 

In [None]:
print("capacity sum after group by: {:,}".format(capacity.capacity.sum()))

In [None]:
print("capacity sum before group by: {:,}".format(df.capacity.sum()))

In [None]:
# units added

In [None]:
capacity.head()

In [None]:
len(capacity)

In [None]:
capacity_by_geo = pd.DataFrame({'capacity': capacity.groupby(["jcpa","jcpa_name"])
                                          .capacity.sum()}).reset_index()

In [None]:
capacity_by_geo.head()

In [None]:
len(capacity_by_geo)

In [None]:
hu_sql = '''
   SELECT  [version_id]
      ,[yr]
      ,[housing_units_add]
  FROM [urbansim].[urbansim].[urbansim_target_housing_units]
  where version_id =112'''
hu = pd.read_sql(hu_sql,mssql_engine)

In [None]:
subregion_sql = '''
  SELECT  [subregional_crtl_id]
      ,[yr]
      ,[geo]
      ,[geo_id]
      ,[control]
      ,[control_type]
      ,[max_units]
      ,[scenario_desc]
  FROM [urbansim].[urbansim].[urbansim_lite_subreg_control]
  where subregional_crtl_id = 211
  order by yr,geo_id'''
subregion = pd.read_sql(subregion_sql,mssql_engine)

In [None]:
targets = pd.merge(hu,subregion,on='yr',how='left')

In [None]:
targets['units'] = targets['control'] * targets['housing_units_add']

In [None]:
targets.head()

In [None]:
targets_by_geo = pd.DataFrame({'targets': targets.groupby(["geo_id"])
                                          .units.sum()}).reset_index()

In [None]:
targets_by_geo.head()

In [None]:
len(targets_by_geo)

In [None]:
capacity_by_geo.head()

In [None]:
len(capacity_by_geo)

In [None]:
results = pd.merge(capacity_by_geo,targets_by_geo,right_on='geo_id',left_on='jcpa',how='outer')

In [None]:
results.to_csv('output2.csv')