In [None]:
from sqlalchemy import create_engine
from pysandag.database import get_connection_string
import pandas as pd

In [None]:
%matplotlib inline

In [None]:
db_connection_string = get_connection_string('..\data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

## Current simulation 

In [None]:
current_run_units_added_sql = '''
SELECT  [units_index]
      ,[parcel_id]
      ,[units_added]
      ,[year_simulation]
      ,[run_id]
  FROM [urbansim].[urbansim].[urbansim_lite_output_units]
  where run_id = 1'''
units_added_df =  pd.read_sql(current_run_units_added_sql, mssql_engine)
units_added_df.drop('units_index',inplace=True,axis=1)
units_added_df.drop('run_id',inplace=True,axis=1)
units_added_df.sort_values(by=['parcel_id','year_simulation'],inplace=True)
units_added_df.set_index('parcel_id',inplace=True)

## Previous simulation 

In [None]:
previous_run_units_sql = '''SELECT  [units_index]
      ,[parcel_id]
      ,[units_added]
      ,[year_simulation]
      ,[run_id]
  FROM [urbansim].[urbansim].[urbansim_lite_output_units]
  where run_id = 2'''
previous_run =  pd.read_sql(previous_run_units_sql, mssql_engine)
previous_run.drop('run_id',inplace=True,axis=1)
previous_run.drop('units_index',inplace=True,axis=1)
previous_run.rename(columns = {'year_built': 'year_simulation'},inplace=True)
previous_run.rename(columns = {'residential_units': 'units_added'},inplace=True)
previous_run.sort_values(by=['parcel_id','year_simulation'],inplace=True)
previous_run.set_index('parcel_id',inplace=True)

## Comparison of simulations

In [None]:
print "\nDo the simulations produce the same results?"
print previous_run.equals(units_added_df)

## Units added by jurisdiction

In [None]:
parcels_sql = '''
  WITH bldgs_by_parcel AS (SELECT parcel_id, SUM(residential_units) AS residential_units, 
                                  count(building_id) AS num_of_bldgs
                           FROM   urbansim.urbansim.building GROUP BY parcel_id)
  SELECT parcels.parcel_id, parcels.jurisdiction_id, parcels.site_id,
         parcels.capacity, 
         COALESCE(bldgs_by_parcel.residential_units,0) AS residential_units,
         COALESCE(bldgs_by_parcel.num_of_bldgs,0) AS bldgs,
         0 as partial_build
  FROM urbansim.urbansim.parcel parcels
  LEFT JOIN bldgs_by_parcel 
  ON bldgs_by_parcel.parcel_id = parcels.parcel_id
  WHERE parcels.capacity > 0
'''
parcels_df =  pd.read_sql(parcels_sql, mssql_engine,index_col='parcel_id')

In [None]:
# print previous_run.head()
# print units_added_df.head()
# print units_added_df.head()
# print previous_run.tail()
# print units_added_df.tail()

In [None]:
units_added_df.reset_index(inplace=True)
units_added_w_city = units_added_df.join(parcels_df,on='parcel_id')
print units_added_w_city.head()

In [None]:
units_by_city = pd.DataFrame({'sr14_no_sched_dev': units_added_w_city .
                                            groupby(["jurisdiction_id"]).
                              units_added.sum()})

# print units_by_city

## Compare to series 13

In [None]:
sr13_forecast_sql = ''' 
WITH sr13_capacity AS (
SELECT  City,sum([hs]) as base_yr_hs_sr13
      ,sum([cap_hs]) as capacity_sr13
  FROM [regional_forecast].[sr13_final].[capacity] x
  inner join [regional_forecast].[sr13_final].[mgra13] as y 
  on y.mgra = x.mgra
   where scenario = 0 and increment = 2012 --and y.City = 1
   GROUP BY City
),
sr13_2050 AS (
SELECT  y.jurisdiction_id,yr_id, sum(units) as units2050
FROM [demographic_warehouse].[fact].[housing] as x
inner join [demographic_warehouse].[dim].[mgra_denormalize] as y 
on x.mgra_id = y.mgra_id
where units > 0 and x.datasource_id = 13 and x.yr_id = 2050
group by yr_id,y.jurisdiction_id
),
sr13_2012 AS (
SELECT  y.jurisdiction_id,yr_id, sum(units) as units2012
FROM [demographic_warehouse].[fact].[housing] as x
inner join [demographic_warehouse].[dim].[mgra_denormalize] as y 
on x.mgra_id = y.mgra_id
where units > 0 and x.datasource_id = 13 and x.yr_id = 2012
group by yr_id,y.jurisdiction_id
)
SELECT sr13_2012.jurisdiction_id,base_yr_hs_sr13,capacity_sr13,sr13_2050.units2050 - sr13_2012.units2012 as forecast_sr13
FROM sr13_2012 
JOIN sr13_2050
ON sr13_2012.jurisdiction_id = sr13_2050.jurisdiction_id
JOIN sr13_capacity
ON sr13_capacity.City = sr13_2012.jurisdiction_id
order by sr13_2012 .jurisdiction_id
'''
sr13_forecast =  pd.read_sql(sr13_forecast_sql, mssql_engine,index_col='jurisdiction_id')

In [None]:
sched_dev_sql = '''
select jurisdiction_id, sum(capacity) as sr14_sched_dev 
from urbansim.urbansim.parcel
where  site_id is NOT NULL
group by jurisdiction_id
order by jurisdiction_id
'''
sched_dev =  pd.read_sql(sched_dev_sql, mssql_engine,index_col='jurisdiction_id')
# print sched_dev

In [None]:
forecast_compare = units_by_city.join(sr13_forecast)
forecast_compare = forecast_compare.join(sched_dev)
forecast_compare['forecast_sr14'] = forecast_compare['sr14_no_sched_dev'] + forecast_compare['sr14_sched_dev']
forecast_compare['diff'] =  forecast_compare['forecast_sr14'] - forecast_compare['forecast_sr13']
print forecast_compare[['forecast_sr14','forecast_sr13','diff']]

In [None]:
forecast_compare.reset_index(inplace=True)
forecast_compare['jurisdiction_id'] = forecast_compare['jurisdiction_id'].astype(str)
x = forecast_compare.append(forecast_compare.sum(numeric_only=True), ignore_index=True)
x.at[19, 'jurisdiction_id'] = 'totals'
x.set_index('jurisdiction_id',inplace=True)
cols = ['forecast_sr14','forecast_sr13','diff']
x[cols] = x[cols].apply(pd.to_numeric, errors='coerce', axis=1,downcast='integer')
# print x.dtypes
print x[['forecast_sr14','forecast_sr13','diff']]

## Number of units added in simulation:

In [None]:
print previous_run.units_added.sum()
print units_added_df.units_added.sum()

In [None]:
# duplicated parcel ids (developed over more than one year)
duplicated_parcels =  units_added_df[units_added_df.duplicated(['parcel_id'],keep=False)].sort_values(by='parcel_id')
print duplicated_parcels.head()
duplicated_parcels_count = pd.DataFrame({'count_parcels': duplicated_parcels.groupby(["parcel_id"]).size()})
duplicated_parcels_count.sort_values(by='count_parcels',inplace=True)

In [None]:
duplicated_parcels_count['count_sum'] = duplicated_parcels_count.count_parcels -  1
print('\nNumber of parcels with units built over multiple years:')
print len(duplicated_parcels_count)
print('\nNumber of extra rows (some parcels have units added in 2 years, some in 3 yrs:')
print duplicated_parcels_count['count_sum'].sum()
# print duplicated_parcels_count.head()

In [None]:
units_by_parcel = pd.DataFrame({'total_units_added': units_added_df.groupby(["parcel_id"]).
                                units_added.sum()}).reset_index()

In [None]:
units_by_parcel.set_index('parcel_id',inplace=True)
print units_by_parcel.head()

In [None]:
print len(units_by_parcel)
print len(units_added_df)
# print len(units_added_df) - duplicated_parcels_count.count_parcels.sum() 
print len(units_added_df) - 101

In [None]:
print units_by_parcel.head()

In [None]:
print "Number of parcels with capacity that have no units added:"
print len(parcels_df[~parcels_df.index.isin(units_by_parcel.index)].reset_index())

In [None]:
print "Number of parcels that have units added but no capacity:"
print len(units_by_parcel[~units_by_parcel.index.isin(parcels_df.index)].reset_index())

In [None]:
print parcels_df.head()
#print units_by_parcel.head()

In [None]:
parcels_plus_capacity = parcels_df.join(units_by_parcel)
print parcels_plus_capacity.head()

In [None]:
parcels_plus_capacity['diff'] = parcels_plus_capacity['capacity'] - parcels_plus_capacity['total_units_added']

In [None]:
parcels_plus_capacity.to_csv('data/pcap.csv')

In [None]:
households_sql = '''
  SELECT sum(hh) AS hh,yr
  FROM isam.demographic_output.summary
  WHERE sim_id = 1004 and yr > 2019
  GROUP BY yr
'''

In [None]:
buildings_sql = '''
SELECT  SUM(COALESCE(residential_units,0)) AS residential_units
FROM urbansim.urbansim.building
'''

In [None]:
sched_dev_sql = '''
SELECT  SUM(COALESCE(capacity,0)) 
FROM urbansim.urbansim.parcel
WHERE site_id is NOT NULL and capacity > 0
'''

In [None]:
hh_df =  pd.read_sql(households_sql, mssql_engine)
du_df =  pd.read_sql(buildings_sql, mssql_engine)
sh_df =  pd.read_sql(sched_dev_sql, mssql_engine)

In [None]:
hh = hh_df.loc[hh_df.yr==2050].hh.values[0]
du = int(du_df.values)
sched_dev_capacity = int(sh_df.values)

In [None]:
units_needed = hh - du - sched_dev_capacity

In [None]:
print '\nHouseholds 2050:'
print hh
print '\nResidential units base year:'
print du
print '\nSched dev:'
print sched_dev_capacity
print '\nUnits needed = Households 2050 - Residential units base year - Sched dev'
print '\nUnits needed:'
print units_needed
print '\nTotal units added:'
print int(units_added_df.units_added.sum())

In [None]:
hh_diff = hh_df.set_index('yr').diff()
hh_diff.reset_index(inplace=True)
ts = pd.Series(hh_diff['hh'].values, index=hh_diff['yr'])

In [None]:
# Number of new households by year 
ts.plot.bar()

In [None]:
# Number of units added by year
df = units_added_df.groupby(['year_simulation'])['units_added'].sum()
df.plot.bar()

In [None]:
units_added_df.sort_values(by='units_added',inplace=True,ascending=False)

In [None]:
print units_added_df.head(10)

In [None]:
print units_added_df.loc[units_added_df.parcel_id==9002470]

In [None]:
# Number of Total households by year
df = hh_df.groupby(['yr'])['hh'].sum()
df.plot.bar()