In [None]:
from sqlalchemy import create_engine
from pysandag.database import get_connection_string
import pandas as pd

In [None]:
%matplotlib inline

In [None]:
db_connection_string = get_connection_string('..\data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

In [None]:
current_run_units_added_sql = '''
SELECT  [units_index]
      ,[parcel_id]
      ,[units_added]
      ,[year_simulation]
      ,[run_id]
  FROM [urbansim].[urbansim].[urbansim_lite_output_units]
  where run_id = 3'''

In [None]:
units_added_df =  pd.read_sql(current_run_units_added_sql, mssql_engine)
units_added_df.drop('units_index',inplace=True,axis=1)
units_added_df.drop('run_id',inplace=True,axis=1)
units_added_df.sort_values(by='parcel_id',inplace=True)

In [None]:
previous_run_units_sql = '''SELECT  [units_index]
      ,[parcel_id]
      ,[units_added]
      ,[year_simulation]
      ,[run_id]
  FROM [urbansim].[urbansim].[urbansim_lite_output_units]
  where run_id = 2'''

In [None]:
previous_run =  pd.read_sql(previous_run_units_sql, mssql_engine)
previous_run.drop('run_id',inplace=True,axis=1)
previous_run.drop('units_index',inplace=True,axis=1)
previous_run.rename(columns = {'year_built': 'year_simulation'},inplace=True)
previous_run.rename(columns = {'residential_units': 'units_added'},inplace=True)
previous_run.sort_values(by='parcel_id',inplace=True)

In [None]:
print "\nComparison of runs - match?"
print previous_run.equals(units_added_df)

In [None]:
# duplicated parcel ids (developed over more than one year)
duplicated_parcels =  units_added_df[units_added_df.duplicated(['parcel_id'],keep=False)].sort_values(by='parcel_id')
# print duplicated_parcels.head()
duplicated_parcels_count = pd.DataFrame({'count_parcels': duplicated_parcels.groupby(["parcel_id"]).size()})
duplicated_parcels_count.sort_values(by='count_parcels',inplace=True)

In [None]:
print('\nNumber of parcels with units built over multiple years:')
print duplicated_parcels_count.count_parcels.sum()

In [None]:
households_sql = '''
  SELECT sum(hh) AS hh,yr
  FROM isam.demographic_output.summary
  WHERE sim_id = 1004 and yr > 2019
  GROUP BY yr
'''

In [None]:
buildings_sql = '''
SELECT  SUM(COALESCE(residential_units,0)) AS residential_units
FROM urbansim.urbansim.building
'''

In [None]:
sched_dev_sql = '''
SELECT  SUM(COALESCE(capacity,0)) 
FROM urbansim.urbansim.parcel
WHERE site_id is NOT NULL and capacity > 0
'''

In [None]:
hh_df =  pd.read_sql(households_sql, mssql_engine)
du_df =  pd.read_sql(buildings_sql, mssql_engine)
sh_df =  pd.read_sql(sched_dev_sql, mssql_engine)

In [None]:
hh = hh_df.loc[hh_df.yr==2050].hh.values[0]
du = int(du_df.values)
sched_dev_capacity = int(sh_df.values)

In [None]:
units_needed = hh - du - sched_dev_capacity

In [None]:
print '\nHouseholds 2050:'
print hh
print '\nResidential units base year:'
print du
print '\nSched dev:'
print sched_dev_capacity
print '\nUnits needed = Households 2050 - Residential units base year - Sched dev'
print '\nUnits needed:'
print units_needed
print '\nTotal units added:'
print int(units_added_df.units_added.sum())

In [None]:
hh_diff = hh_df.set_index('yr').diff()
hh_diff.reset_index(inplace=True)
ts = pd.Series(hh_diff['hh'].values, index=hh_diff['yr'])

In [None]:
# Number of new households by year 
ts.plot.bar()

In [None]:
# Number of units added by year
df = units_added_df.groupby(['year_simulation'])['units_added'].sum()
df.plot.bar()

In [None]:
units_added_df.sort_values(by='units_added',inplace=True,ascending=False)

In [None]:
print units_added_df.head(10)

In [None]:
print units_added_df.loc[units_added_df.parcel_id==9002470]

In [None]:
# Number of Total households by year
df = hh_df.groupby(['yr'])['hh'].sum()
df.plot.bar()