In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
%matplotlib inline
from sqlalchemy import create_engine
from pysandag.database import get_connection_string

db_connection_string = get_connection_string('..\data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

### Get max run id

In [None]:
run_id_sql = '''
SELECT max(run_id)
  FROM [urbansim].[urbansim].[urbansim_lite_output]
'''
run_id_df = pd.read_sql(run_id_sql, mssql_engine)
run_id = int(run_id_df.values)

In [None]:
run_id

### 1. Get output of simulation

##### urbansim.urbansim.urbansim_lite_output (unit change by year, grouped by jurisdiction)

In [None]:
hs_change_sql = '''
    SELECT j.name, p.jurisdiction_id, sum(unit_change) as hs_change, year_simulation
      FROM urbansim.urbansim.urbansim_lite_output o 
      JOIN urbansim.urbansim.parcel p on p.parcel_id = o.parcel_id
      JOIN urbansim.ref.jurisdiction j on p.jurisdiction_id = j.jurisdiction_id
     WHERE run_id =  %s
  GROUP BY j.name,p.jurisdiction_id, year_simulation
  ORDER BY j.name,p.jurisdiction_id, year_simulation'''
hs_change_sql = hs_change_sql % run_id

In [None]:
units_by_jur = pd.read_sql(hs_change_sql,mssql_engine,index_col='jurisdiction_id')

In [None]:
units_by_jur.head()

In [None]:
units_by_jur.loc[units_by_jur.name=='Carlsbad'].hs_change.sum()

### 2. Fill in "0" for units for "missing" years (for plotting) (e.g. Del Mar)

In [None]:
# Del Mar example
del_mar_before = units_by_jur.loc[4].sort_values(by='year_simulation')
# del_mar_before.head()
del_mar_before.plot(x='year_simulation',y='hs_change',style='.-',title='NULL values in Del Mar Housing Unit Change')

In [None]:
units_by_jur.head()

In [None]:
idx = range(2017,2051)
units_by_jur.set_index(['name','year_simulation'],append=True,inplace=True)
units_by_jur = units_by_jur.unstack(['jurisdiction_id','name'])
units_by_jur = units_by_jur.reindex(idx, fill_value=0)
units_by_jur.fillna(0,inplace=True)
units_by_jur = units_by_jur.stack(['jurisdiction_id','name'])
units_by_jur.reset_index(inplace=True)
units_by_jur.set_index('jurisdiction_id',inplace=True)

In [None]:
del_mar_after = units_by_jur.loc[4].sort_values(by='year_simulation')
del_mar_after.plot(x='year_simulation',y='hs_change',style='.-',title='Replace Null with Zeroes Del Mar Housing Unit Change')

### 3. Get capacity

#####  <span style="color:red">!!! NOTE: urbansim.parcel for city and county, parcel_update_2017 for other jurisdictions & sched dev table </span>

##### urbansim.parcel_update_2017 where site_id IS NULL

In [None]:
# capacity
parcel_update_2017_sql = '''
    SELECT  p.jurisdiction_id, 
            sum(cap_remaining_new) AS capacity_sans_sched_dev 
       FROM urbansim.urbansim.parcel_update_2017 update2017
       JOIN urbansim.urbansim.parcel p
         ON p.parcel_id = update2017.parcelid_2015
      WHERE cap_remaining_new > 0 and jurisdiction_id NOT IN (14,19) and site_id IS NULL
      GROUP by p.jurisdiction_id
      ORDER BY p.jurisdiction_id
'''
update_2017_df =  pd.read_sql(parcel_update_2017_sql, mssql_engine,index_col='jurisdiction_id')
# update_2017_df

##### from urbansim.parcel where site_id IS NULL

In [None]:
# capacity for city and county - not in parcel update 2017
parcel_city_and_county_sql = '''
    SELECT	p.jurisdiction_id, sum(capacity) AS capacity_sans_sched_dev 
       FROM urbansim.urbansim.parcel p
      WHERE capacity > 0 and jurisdiction_id IN (14,19) and site_id IS NULL
      GROUP by p.jurisdiction_id
      ORDER BY p.jurisdiction_id
'''
city_county_capacity_parcel_df =  pd.read_sql(parcel_city_and_county_sql, mssql_engine,index_col='jurisdiction_id')
# city_county_capacity_parcel_df

join urbansim.parcel_update_2017 and urbansim.parcel

In [None]:
capacity_not_sched = pd.concat([update_2017_df,city_county_capacity_parcel_df])

In [None]:
capacity_not_sched.sort_index(inplace=True)

In [None]:
# capacity_not_sched

In [None]:
# capacity from sched dev
sched_dev_sql = '''
    SELECT p.jurisdiction_id, sum(res_units) AS capacity_sched_dev
      FROM urbansim.urbansim.scheduled_development_do_not_use s
      JOIN urbansim.urbansim.parcel p on p.parcel_id = s.parcel_id
     WHERE scenario = 1 and yr > 2016
     GROUP by p.jurisdiction_id
    ORDER BY p.jurisdiction_id
'''
sched_dev_df =  pd.read_sql(sched_dev_sql, mssql_engine,index_col='jurisdiction_id')

# sched_dev_df

In [None]:
cap_by_jur = sched_dev_df.join(capacity_not_sched)

In [None]:
cap_by_jur['capacity'] = cap_by_jur['capacity_sched_dev'] + cap_by_jur['capacity_sans_sched_dev']

In [None]:
cap_by_jur = cap_by_jur[['capacity']]

In [None]:
# cap_by_jur

### 4. Sum units from output of simulation over five year increments

In [None]:
bins = range(2015,2055,5)
names = [str(x) for x in range(2020,2055,5)]
units_by_jur['increment'] = pd.cut(units_by_jur.year_simulation, bins, labels=names)

In [None]:
units_by_jur_inc = pd.DataFrame({'hs_added': units_by_jur.
                                            groupby(["increment", "jurisdiction_id","name"]).
                                 hs_change.sum()}).reset_index()

### 5. Join output of simulation with capacity

In [None]:
units_by_jur_inc = units_by_jur_inc.set_index('jurisdiction_id').join(cap_by_jur)

### 6. Add increment 2017 with units added equal to 0 (for plotting)

In [None]:
start_year = units_by_jur_inc.loc[units_by_jur_inc.increment=='2020'].copy()

In [None]:
start_year['increment'] = '2017'
start_year['hs_added'] = 0

In [None]:
units_by_jur_inc = pd.concat([start_year,units_by_jur_inc])

### 7. Pivot so each jurisdiction is a column, each row is an increment and the values are hs unit change

In [None]:
units_by_jur_pivot = units_by_jur_inc.pivot\
(index='increment', columns='name', values='hs_added').\
reset_index().rename_axis(None, axis=1)
units_by_jur_pivot.fillna(0,inplace=True)
units_by_jur_pivot.set_index('increment',inplace=True)

### 8. Pivot so each jurisdiction is a column, each row is an increment and the values are capacity

In [None]:
cap_by_jur_pivot = units_by_jur_inc.pivot\
(index='increment', columns='name', values='capacity').\
reset_index().rename_axis(None, axis=1)
cap_by_jur_pivot.fillna(0,inplace=True)
cap_by_jur_pivot.set_index('increment',inplace=True)

### 9. Calculate cumulative sum of hs unit change by jurisdiction

In [None]:
units_by_jur_pivot = units_by_jur_pivot.cumsum()

In [None]:
units_by_jur_pivot 

### 10. Plot results - line plots

In [None]:
fig, axes = plt.subplots(nrows=10, ncols=2)
for j, jur in enumerate(units_by_jur.name.unique().tolist()):
    df_units_added = units_by_jur_pivot[[jur]]
    df_capacity = cap_by_jur_pivot[[jur]]
    # join dataframe of units added with dataframe of capacity for each jurisdiction
    df_plot = df_units_added.join(df_capacity, lsuffix='_hs_change', rsuffix='_capacity')
    hs_column = jur + '_hs_change'
    cap_column = jur + '_capacity'
    cap_plot_column = jur + '_remaining_cap'
    # calculate remaining capacity by subtracting
    # units added from starting capacity
    df_plot[cap_plot_column] = df_plot[cap_column] - df_plot[hs_column]
    del  df_plot[cap_column]
    df_plot.plot(style='.-',ax=axes.flat[j],figsize=(10,32))
    fig.savefig('units_and_capacity_lineplot.png', format='png', dpi=300)

### 10. Plot results - bar plots

In [None]:
fig, axes = plt.subplots(nrows=10, ncols=2)
for j, jur in enumerate(units_by_jur.name.unique().tolist()):
    df_units_added = units_by_jur_pivot[[jur]]
    df_capacity = cap_by_jur_pivot[[jur]]
    # join dataframe of units added with dataframe of capacity for each jurisdiction
    df_plot = df_units_added.join(df_capacity, lsuffix='_hs_change', rsuffix='_capacity')
    hs_column = jur + '_hs_change'
    cap_column = jur + '_capacity'
    cap_plot_column = jur + '_remaining_cap'
    # calculate remaining capacity by subtracting
    # units added from starting capacity
    df_plot[cap_plot_column] = df_plot[cap_column] - df_plot[hs_column]
    del  df_plot[cap_column]
    df_plot.plot(style='.-',ax=axes.flat[j],figsize=(10,32),kind='bar')
    fig.savefig('units_and_capacity_barplot.png', format='png', dpi=300)

## Units Added by Scheduled Dev

In [None]:
units_added_sched_dev_sql = '''
    SELECT p.jurisdiction_id,j.name,year_simulation,o.source,
           sum([unit_change]) as units_built
      FROM [urbansim].[urbansim].[urbansim_lite_output] o
      JOIN [urbansim].[urbansim].[parcel] p on p.parcel_id = o.parcel_id
      JOIN urbansim.ref.jurisdiction j on p.jurisdiction_id = j.jurisdiction_id
     WHERE run_id =  %s 
  GROUP BY p.jurisdiction_id,o.source,j.name,year_simulation
  ORDER BY j.name,o.source,year_simulation
'''
units_added_sched_dev_sql = units_added_sched_dev_sql % run_id
units_added_sched_dev_df =  pd.read_sql(units_added_sched_dev_sql,mssql_engine,index_col='jurisdiction_id')

In [None]:
idx = range(2017,2051)
units_added_sched_dev_df.set_index(['name','year_simulation','source'],append=True,inplace=True)
units_added_sched_dev_df = units_added_sched_dev_df.unstack(['jurisdiction_id','name','source'])
units_added_sched_dev_df = units_added_sched_dev_df.reindex(idx, fill_value=0)
units_added_sched_dev_df.fillna(0,inplace=True)
units_added_sched_dev_df = units_added_sched_dev_df.stack(['jurisdiction_id','name','source'])
units_added_sched_dev_df.reset_index(inplace=True)
units_added_sched_dev_df.set_index('jurisdiction_id',inplace=True)

### Add increment 2017 with units added equal to 0 (for plotting)

In [None]:
start_year = units_added_sched_dev_df.loc[units_added_sched_dev_df.year_simulation==2017].copy()

In [None]:
start_year['year_simulation'] = '2016'
start_year['units_built'] = 0

In [None]:
units_added_sched_dev_by_increment = units_added_sched_dev_df.copy()

In [None]:
units_added_sched_dev_by_increment = pd.concat([start_year,units_added_sched_dev_by_increment])

In [None]:
# units_by_jur_source = units_added_sched_dev_by_increment.loc[units_added_sched_dev_by_increment.name == 'Carlsbad']

In [None]:
units_added_sched_dev_by_increment.source.unique().tolist()

In [None]:
x = units_added_sched_dev_by_increment.loc[units_added_sched_dev_by_increment.source=='entire_region']

In [None]:
x.name.unique()

##### Fill in "entire region" as 0 for jurisdictions that do not have it (e.g. Oceanside)

In [None]:
idx = units_added_sched_dev_by_increment.source.unique().tolist()
units_added_sched_dev_by_increment.set_index(['name','year_simulation','source'],append=True,inplace=True)
units_added_sched_dev_by_increment = units_added_sched_dev_by_increment.unstack(['jurisdiction_id','name','year_simulation'])
units_added_sched_dev_by_increment = units_added_sched_dev_by_increment.reindex(idx, fill_value=0)
units_added_sched_dev_by_increment.fillna(0,inplace=True)
units_added_sched_dev_by_increment = units_added_sched_dev_by_increment.stack(['jurisdiction_id','name','year_simulation'])
units_added_sched_dev_by_increment.reset_index(inplace=True)
units_added_sched_dev_by_increment.set_index('jurisdiction_id',inplace=True)

#####  <span style="color:red"> might be better to make these as a stacked bar </span>

In [None]:
fig, axes = plt.subplots(nrows=10, ncols=2)
for j, jur in enumerate(units_added_sched_dev_by_increment.name.unique().tolist()):
    units_by_jur_source = units_added_sched_dev_by_increment.loc[units_added_sched_dev_by_increment.name == jur]
    units_by_jur_source_pivot = units_by_jur_source.pivot\
    (index='year_simulation', columns='source', values='units_built').\
    reset_index().rename_axis(None, axis=1)
    units_by_jur_pivot.fillna(0,inplace=True)
    units_by_jur_source_pivot['subregional'] = units_by_jur_source_pivot['subregional_control'] + \
    units_by_jur_source_pivot['entire_region']
    units_by_jur_source_pivot['total'] = units_by_jur_source_pivot['subregional'] + \
    units_by_jur_source_pivot['sched_dev']
    units_by_jur_source_pivot.set_index('year_simulation',inplace=True)
    df_plot2 = units_by_jur_source_pivot[['subregional','sched_dev','total']].copy()
    df_plot2.rename(columns = {'subregional': jur + '_not_sched_dev'}, inplace=True)
    df_plot2.rename(columns = {'sched_dev': jur + '_sched_dev'}, inplace=True)
    df_plot2.rename(columns = {'total': jur + '_total'}, inplace=True)
    df_plot2 = df_plot2.cumsum()
    df_plot2.plot(style='.-',ax=axes.flat[j],figsize=(10,32))
    # fig.savefig('units_and_capacity_lineplot.png', format='png', dpi=300)

### 11. Double check results for one jurisdiction

In [None]:
jur = 'Encinitas'
jur_id = 6
confluence_pg_cap = 2460
# jur = 'Imperial Beach'
# jur_id = 8
# confluence_pg_cap = 3569

In [None]:
print("{} housing unit change and remaining capacity".format(jur))

In [None]:
df_units_added = units_by_jur_pivot[[jur]]
df_capacity = cap_by_jur_pivot[[jur]]
df_plot = df_units_added.join(df_capacity, lsuffix='_hs_change', rsuffix='_capacity')
hs_column = jur + '_hs_change'
cap_column = jur + '_capacity'
cap_plot_column = jur + '_remaining_cap'
df_plot[cap_plot_column] = df_plot[cap_column] - df_plot[hs_column]
del  df_plot[cap_column]
df_plot.plot(style='.-',title=jur)

In [None]:
remaining_cap = jur + '_remaining_cap'
hs_change =  jur + '_hs_change'

Capacity and housing unit change shown on plot

In [None]:
print("Starting capacity: {}\nHousing unit change: {}\nRemaining:   {}".\
      format(int(df_plot.loc['2017'][remaining_cap]),\
       int(df_plot.loc['2050'][hs_change]),\
             int(df_plot.loc['2017'][remaining_cap])-int(df_plot.loc['2050'][hs_change])))
print("Remaining capacity {} 2050: {}".format(jur,int(df_plot.loc['2050'][remaining_cap])))

Capacity on confluence page Jurisdiction Feedback

In [None]:
print("Starting capacity on jur feedback confluence page: {}".format(confluence_pg_cap))
print("Difference between confluence page and plot: {}".\
      format(int(df_plot.loc['2017'][remaining_cap])-confluence_pg_cap))

Capacity and forecast from database

In [None]:
hs_change_sql = '''
    SELECT j.name, p.jurisdiction_id, sum(unit_change) as hs_change, year_simulation
      FROM urbansim.urbansim.urbansim_lite_output o 
      JOIN urbansim.urbansim.parcel p on p.parcel_id = o.parcel_id
      JOIN urbansim.ref.jurisdiction j on p.jurisdiction_id = j.jurisdiction_id
     WHERE run_id =  %s
  GROUP BY j.name,p.jurisdiction_id, year_simulation
  ORDER BY j.name,p.jurisdiction_id, year_simulation'''
hs_change_sql = hs_change_sql % run_id

In [None]:
simulation_NOT_sched_dev =  '''  
    SELECT  sum(unit_change)
       FROM urbansim.urbansim.urbansim_lite_output o
       JOIN urbansim.parcel p on p.parcel_id = o.parcel_id
      WHERE jurisdiction_id = %s and source != 'sched_dev' and run_id =  %s'''
simulation_NOT_sched_dev_sql = simulation_NOT_sched_dev % (jur_id,run_id)
sim_NOT_sched_dev_df =  pd.read_sql(simulation_NOT_sched_dev_sql,mssql_engine)

In [None]:
jur_capacity_sql =  '''
     SELECT sum(capacity)
       FROM urbansim.parcel
      WHERE jurisdiction_id = %s and capacity > 0'''
jur_capacity_sql = jur_capacity_sql % jur_id
cap_jur_df =  pd.read_sql(jur_capacity_sql,mssql_engine)

jur_capacity_sql_no_sched_dev_sql =  '''
     SELECT sum(capacity)
       FROM urbansim.parcel
      WHERE jurisdiction_id = %s and site_id IS NULL and capacity > 0'''
jur_capacity_sql_no_sched_dev_sql =  jur_capacity_sql_no_sched_dev_sql % jur_id
cap_jur_df_no_sched_dev =  pd.read_sql(jur_capacity_sql_no_sched_dev_sql,mssql_engine)

jur_capacity_sql_just_sched_dev =  '''
     SELECT sum(res_units)
       FROM urbansim.urbansim.scheduled_development_do_not_use s
       JOIN urbansim.parcel p on p.parcel_id = s.parcel_id
      WHERE jurisdiction_id = %s and yr >= 2017'''
jur_capacity_sql_just_sched_dev =  jur_capacity_sql_just_sched_dev % jur_id
cap_jur_just_sched_dev_df =  pd.read_sql(jur_capacity_sql_just_sched_dev,mssql_engine)

simulation_sched_dev =  '''  
    SELECT  sum(unit_change)
       FROM urbansim.urbansim.urbansim_lite_output o
       JOIN urbansim.parcel p on p.parcel_id = o.parcel_id
      WHERE jurisdiction_id = %s and source='sched_dev' and run_id =  %s'''
simulation_sched_dev_sql = simulation_sched_dev % (jur_id,run_id)
sim_sched_dev_df =  pd.read_sql(simulation_sched_dev_sql,mssql_engine)

simulation_NOT_sched_dev =  '''  
    SELECT  sum(unit_change)
       FROM urbansim.urbansim.urbansim_lite_output o
       JOIN urbansim.parcel p on p.parcel_id = o.parcel_id
      WHERE jurisdiction_id = %s and source != 'sched_dev' and run_id =  %s'''
simulation_NOT_sched_dev_sql = simulation_NOT_sched_dev % (jur_id,run_id)
sim_NOT_sched_dev_df =  pd.read_sql(simulation_NOT_sched_dev_sql,mssql_engine)

In [None]:
print(jur)
print("\nTotal capacity from urbansim.parcel: {}".\
      format(int(cap_jur_df.values)))
print("Total capacity from urbansim.parcel and sched dev table: {}\n".\
      format(int(cap_jur_df_no_sched_dev.values) +\
             int(cap_jur_just_sched_dev_df.values)))
print("Capacity sched dev: {}".\
      format(int(cap_jur_just_sched_dev_df.values)))
print("Forecast sched dev: {}\n".\
      format(int(sim_sched_dev_df.values)))

print("Capacity no sched dev: {}".\
      format(int(cap_jur_df_no_sched_dev.values)))
print("Forecast no sched dev: {}\n".\
      format(int(sim_NOT_sched_dev_df.values)))

print("Remaining: {}".\
      format(int(cap_jur_df_no_sched_dev.values)-\
                 int(sim_NOT_sched_dev_df.values)))
print("Remaining shown on plot: {}\n".format(int(df_plot.loc['2050'][remaining_cap])))
print("Forecast total: {}".\
      format(int(sim_sched_dev_df.values) +  int(sim_NOT_sched_dev_df.values)))
print("Forecast housing unit change on plot: {}".\
      format(int(df_plot.loc['2050'][hs_change])))


In [None]:
remaining_capacity_sql = ''' 
        WITH parcel_out AS 
            (SELECT  o.parcel_id, sum(unit_change) as units_added
                FROM urbansim.urbansim.urbansim_lite_output o 
                JOIN urbansim.parcel p on p.parcel_id = o.parcel_id
                WHERE jurisdiction_id = %s and source != 'sched_dev' and  run_id =  %s
                GROUP BY o.parcel_id)
        SELECT sum(p.capacity - COALESCE(o.units_added,0))
        FROM urbansim.parcel p
        FULL OUTER JOIN parcel_out o
        ON o.parcel_id = p.parcel_id
        WHERE jurisdiction_id = %s  and site_id IS NULL and p.capacity  > 0 '''
remaining_capacity_sql = remaining_capacity_sql % (jur_id,run_id,jur_id)
remaining_capacity_df =  pd.read_sql(remaining_capacity_sql,mssql_engine)
remaining_capacity_df =  pd.read_sql(remaining_capacity_sql,mssql_engine)

In [None]:
print("Remaining capacity from db: {}".\
      format(int(remaining_capacity_df.values)))

In [None]:
jur_forecast = units_by_jur.loc[jur_id].sort_values(by='year_simulation')

In [None]:
df_plot_jur = jur_forecast[['year_simulation','hs_change']]

In [None]:
df_plot_jur.set_index('year_simulation',inplace=True)

### Housing unit change by year

In [None]:
df_plot_jur.plot(style='.-',title=jur)

### Housing unit change cumulative sum

In [None]:
df_plot_jur = df_plot_jur.cumsum()

In [None]:
df_plot_jur.plot(style='.-',title=jur)

### Housing unit change cumulative sum by increment

In [None]:
df_plot.plot(style='.-',title=jur)

#### Housing units by source: sched dev, region control, entire region (remaining)

In [None]:
unit_type_by_year = '''
    SELECT x.jurisdiction_id, sum(x.units_built) as units_by_type, x.source
    FROM(SELECT jurisdiction_id,
        sum([unit_change]) as units_built,
        [year_simulation],
        o.[source]
        FROM [urbansim].[urbansim].[urbansim_lite_output] o
        join [urbansim].[urbansim].[parcel] p on p.parcel_id = o.parcel_id
        where run_id = %s
        GROUP BY o.source,jurisdiction_id,year_simulation) as x
        group by x.jurisdiction_id, x.source
        order by x.jurisdiction_id, x.source
'''

unit_type_by_year = unit_type_by_year % run_id
unit_type_by_year_df =  pd.read_sql(unit_type_by_year,mssql_engine)

In [None]:
unit_type_by_year_pivot = unit_type_by_year_df.pivot\
(index='source', columns='jurisdiction_id', values='units_by_type').\
reset_index().rename_axis(None, axis=1)
unit_type_by_year_pivot.set_index('source',inplace=True)

In [None]:
# unit_type_by_year_pivot

In [None]:
fig, axes = plt.subplots(nrows=10, ncols=2)
for j in unit_type_by_year_df.jurisdiction_id.unique().tolist():
    jur_plot = unit_type_by_year_pivot[[j]]
    jur_plot.plot(style='.-',ax=axes.flat[j],rot=0,figsize=(10,32),kind='bar')
