# QA of Scheduled Developments in Forecast

#### source data: demographic warehouse <br> preliminary forecast 2021 RTP (datasource id 30) & 2018 estimates (datasource id 27)

### Method:

#### 1. check for sched dev that are built or partially built at the start of the forecast<br>&emsp; by finding MGRAs that have  both: <br> &emsp;&emsp; unit changes in the vintage 2018 estimates for yrs 2017 to 2018 & scheduled developments

### Steps:

##### set up python environment

In [None]:
# set up python environment
import numpy as np
import os 
import sys
import pandas as pd
from sqlalchemy import create_engine
import xlsxwriter
from matplotlib import pyplot as plt
from pandas.plotting import table 
%matplotlib inline
# connect to database
db_connection_string = 'mssql+pyodbc://sql2014a8/ws?driver=SQL+Server+Native+Client+11.0'
mssql_engine = create_engine(db_connection_string)

##### get all MGRAs w sched dev AND unit increase 2017 to 2018 (source: estimates 2018, datasource 27)

In [None]:
# Note: use isam.xpef23.parcel_du_xref_post2017 as crosswalk from parcel to mgra 
# to be consistent w demographic warehouse. DO NOT use urbansim.parcel
# Use urbansim.scheduled_development_parcel to get site ids.  DO NOT use urbansim.parcel 
# since some parcels are missing sched dev site ids
sql_query = '''
WITH estimates2018_yr2018 AS (
        SELECT mgra,sum(units) as estimates_units2018, geozone
          FROM demographic_warehouse.fact.housing
          JOIN demographic_warehouse.dim.mgra 
            ON mgra.mgra_id = housing.mgra_id
         WHERE datasource_id = 27 AND geotype = 'region' AND yr_id = 2018 AND 
               mgra IN (
                    SELECT DISTINCT mgra
                      FROM urbansim.urbansim.scheduled_development_parcel sched_dev
                      JOIN isam.xpef23.parcel_du_xref_post2017 xpef23
                        ON sched_dev.parcel_id =  xpef23.parcel_id)  
      GROUP BY yr_id, mgra, datasource_id, geozone),
     estimates2018_yr2017 AS (
        SELECT mgra,sum(units) as estimates_units2017
          FROM demographic_warehouse.fact.housing
          JOIN demographic_warehouse.dim.mgra 
            ON mgra.mgra_id = housing.mgra_id
         WHERE datasource_id = 27 AND geotype = 'region' AND  yr_id = 2017 AND 
               mgra IN (
                    SELECT DISTINCT mgra
                      FROM urbansim.urbansim.scheduled_development_parcel sched_dev
                      JOIN isam.xpef23.parcel_du_xref_post2017 xpef23
                        ON sched_dev.parcel_id =  xpef23.parcel_id) 
      GROUP BY yr_id, mgra, datasource_id, geozone),
     estimates2018_yr2016 AS (
        SELECT mgra,sum(units) as estimates_units2016
          FROM demographic_warehouse.fact.housing
          JOIN demographic_warehouse.dim.mgra 
            ON mgra.mgra_id = housing.mgra_id
         WHERE datasource_id = 27 AND geotype = 'region' AND  yr_id = 2016 AND 
               mgra IN (
                    SELECT DISTINCT mgra
                      FROM urbansim.urbansim.scheduled_development_parcel sched_dev
                      JOIN isam.xpef23.parcel_du_xref_post2017 xpef23
                        ON sched_dev.parcel_id =  xpef23.parcel_id) 
      GROUP BY yr_id, mgra, datasource_id, geozone)
SELECT estimates2018_yr2018.mgra, estimates_units2016,estimates_units2017, estimates_units2018,
       estimates_units2018-estimates_units2017 as estimates_unit_change_2017_to_2018
  FROM estimates2018_yr2018
  JOIN estimates2018_yr2017
    ON estimates2018_yr2018.mgra = estimates2018_yr2017.mgra
  JOIN estimates2018_yr2016
    ON estimates2018_yr2018.mgra = estimates2018_yr2016.mgra
 WHERE estimates_units2018 > estimates_units2017
'''
mgras = pd.read_sql(sql_query, mssql_engine)
mgras.sort_values(by=['estimates_unit_change_2017_to_2018'],ascending=False,inplace=True)
mgras.reset_index(drop=True,inplace=True)
# get list of the MGRAs as string for query
mgras.mgra = mgras.mgra.astype('int64')
mgralist = mgras['mgra'].values.tolist()
mgrastr = ','.join(map(str, mgralist))


##### MGRAs w/ scheduled development and unit increase 2017 to 2018 (source: estimates 2018 datasource 27)

In [None]:
print('\n\nCount of MGRAs: (with sched dev & unit increase 2017 to 2018 based on estimates): ',len(mgralist))
print('\n\nTotal unit increase for those MGRAs: '\
      ,mgras.estimates_unit_change_2017_to_2018.sum())
print('\nMgras are:',mgrastr)

##### get sched dev site ids for each MGRA (w unit change from 2017 to 2018)

In [None]:
sql_query = '''
SELECT mgra,site_id 
FROM urbansim.urbansim.scheduled_development_parcel sched_dev
JOIN isam.xpef23.parcel_du_xref_post2017 xpef23
ON sched_dev.parcel_id =  xpef23.parcel_id
WHERE mgra IN ({})'''.format(mgrastr) 
sites = pd.read_sql(sql_query, mssql_engine)
sites.site_id = sites.site_id.astype('int64')
sites.mgra = sites.mgra.astype('int64')

sites_by_mgra = sites.groupby('mgra', as_index=False).agg(lambda x: ', '.join(set(x.astype(str))))
mgra_site = pd.merge(mgras, sites_by_mgra, on='mgra')

In [None]:
# mgra_site.style

##### for each of these MGRAs get the capacity from jur provided, sched dev, and adu

In [None]:
sql_query = '''
    SELECT  mgra,
            sum([capacity_2]) as [jurisdiction provided capacity],
            sum([capacity_3]) as [scheduled development capacity],
            sum([capacity_ADU]) as [ADU capacity]
       FROM urbansim.urbansim.vi_capacity
       JOIN isam.xpef23.parcel_du_xref_post2017 
         ON parcel_du_xref_post2017.parcel_id = vi_capacity.parcel_id
      WHERE mgra IN ({})'''.format(mgrastr) + " GROUP BY mgra"
all_capacity = pd.read_sql(sql_query, mssql_engine)
all_capacity['total_capacity'] = all_capacity['jurisdiction provided capacity'] + \
all_capacity['scheduled development capacity'] + \
all_capacity['ADU capacity']

##### Get unit change in forecast

In [None]:
# unit change demographic warehouse
sql_query = '''
With dw2050 AS (
SELECT mgra,sum([units]) as forecast_units2050,geozone
  FROM [demographic_warehouse].[fact].[housing]
   JOIN demographic_warehouse.dim.mgra on mgra.mgra_id = housing.mgra_id
  WHERE datasource_id = 30 and geotype = 'region'  and yr_id = 2050 and mgra IN ({})'''.format(mgrastr) +\
'''
  GROUP by yr_id,mgra,datasource_id,geozone),
  dw2016 AS (
  SELECT mgra,sum([units]) as forecast_units2016
  FROM [demographic_warehouse].[fact].[housing]
  JOIN demographic_warehouse.dim.mgra on mgra.mgra_id = housing.mgra_id
  WHERE datasource_id = 30 and geotype = 'region' and  yr_id = 2016 and mgra IN ({})'''.format(mgrastr) +\
'''
  GROUP by yr_id,mgra)
  SELECT dw2050.mgra,forecast_units2016,forecast_units2050,forecast_units2050-forecast_units2016 as unit_change_forecast
  FROM dw2050
  JOIN dw2016
  ON dw2016.mgra = dw2050.mgra
'''
demographic_warehouse_df = pd.read_sql(sql_query, mssql_engine)

In [None]:
capacity_forecast = pd.merge(demographic_warehouse_df, all_capacity, on='mgra')
capacity_forecast['capacity_minus_forecast'] = capacity_forecast['total_capacity'] - capacity_forecast['unit_change_forecast']

In [None]:
# capacity_forecast[['mgra','unit_change_forecast','total_capacity','capacity_minus_forecast']].style

In [None]:
result = pd.merge(capacity_forecast, mgra_site, on='mgra')

In [None]:
cols = result.columns.tolist()

In [None]:
cols

In [None]:
cols = ['mgra','site_id','estimates_units2017','estimates_units2018','estimates_unit_change_2017_to_2018',\
        'forecast_units2016','forecast_units2050','unit_change_forecast',\
        'jurisdiction provided capacity','scheduled development capacity','ADU capacity','total_capacity',\
       'capacity_minus_forecast']

In [None]:
result = result[cols]

In [None]:
#result.style

In [None]:
result.to_csv('sched_dev_forecast_QA.csv',index=False)

In [None]:
# urbansim output run id 444 and mgras
sql_query = '''
SELECT run_id,mgra,sum([unit_change]) as unit_change,COALESCE(site_id,0) as site_id,
    [year_simulation],[capacity_type]
FROM [urbansim].[urbansim].[urbansim_lite_output] o
JOIN isam.xpef23.parcel_du_xref_post2017 xpef23 
    ON xpef23.parcel_id = o.parcel_id
LEFT JOIN urbansim.urbansim.scheduled_development_parcel sched_dev
    ON sched_dev.parcel_id = o.parcel_id
WHERE run_id = 444 and 
mgra IN ({})'''.format(mgrastr) + '''
GROUP BY year_simulation,capacity_type,mgra,run_id,site_id
ORDER by mgra'''
urbansim_out_df = pd.read_sql(sql_query, mssql_engine)
urbansim_out_df.mgra = urbansim_out_df.mgra.astype('int64')
urbansim_out_df.site_id = urbansim_out_df.site_id.astype('int64')
#urbansim_out_df.site_id.replace(0, np.nan, inplace=True)

In [None]:
idx =  pd.Series(range(2016,2051))

In [None]:
urbansim_out_df_sch = urbansim_out_df.loc[urbansim_out_df['capacity_type'] == 'sch'].copy()
urbansim_out_df_jur = urbansim_out_df.loc[urbansim_out_df['capacity_type'] == 'jur'].copy()
urbansim_out_df_adu = urbansim_out_df.loc[urbansim_out_df['capacity_type'] == 'adu'].copy()

In [None]:
urbansim_out_df_jur.site_id.replace(0, 'jur_provided_cap', inplace=True)
urbansim_out_df_adu.site_id.replace(0, 'ADU', inplace=True)

In [None]:
mgra_urb = pd.concat([urbansim_out_df_sch,urbansim_out_df_jur,urbansim_out_df_adu])

In [None]:
mgra_urb = mgra_urb.sort_values(by=['mgra'])

In [None]:
# estimates demographic warehouse id 27 units by year
sql_query = '''
    SELECT mgra,sum(units) as units,yr_id as year_simulation
          FROM demographic_warehouse.fact.housing
          JOIN demographic_warehouse.dim.mgra 
            ON mgra.mgra_id = housing.mgra_id
         WHERE datasource_id = 27 AND geotype = 'region'  AND yr_id > 2016 AND
               mgra IN ({})'''.format(mgrastr) + ''' 
      GROUP BY yr_id, mgra, datasource_id, geozone
      ORDER BY mgra,yr_id'''
estimates_df = pd.read_sql(sql_query, mssql_engine)
# dw_df.mgra = dw_df.mgra.astype('int64')
estimates_df.mgra = estimates_df.mgra.astype('int64')

In [None]:
estimates_df['data_lagged'] = estimates_df.groupby(['mgra'])['units'].shift(1)
estimates_df['unit_change'] = estimates_df['units'] - estimates_df['data_lagged']
estimates_df.fillna(0,inplace=True)

In [None]:
# estimates_df.to_csv('estimates.csv')

In [None]:
# forecast demographic warehouse id 30 units by year
sql_query = '''
SELECT datasource_id,yr_id as year_simulation,sum([units]) as units,mgra
  FROM [demographic_warehouse].[fact].[housing]
  JOIN demographic_warehouse.dim.mgra on mgra.mgra_id = housing.mgra_id
  WHERE datasource_id = 30 and geotype = 'jurisdiction' AND
  mgra IN ({})'''.format(mgrastr) + '''
  GROUP by yr_id,datasource_id,mgra
  ORDER by mgra,yr_id'''
dw_df = pd.read_sql(sql_query, mssql_engine)
dw_df.mgra = dw_df.mgra.astype('int64')

In [None]:
dw_df['data_lagged'] = dw_df.groupby(['mgra'])['units'].shift(1)

In [None]:
dw_df['unit_change'] = dw_df['units'] - dw_df['data_lagged']

In [None]:
dw_df.fillna(0,inplace=True)

In [None]:
# dw_df.to_csv('forecast.csv')

### PLOT RESULTS

In [None]:
# Create an new Excel file and add a worksheet.
workbook = xlsxwriter.Workbook('images.xlsx')
worksheet = workbook.add_worksheet()
# Widen the first column to make the text clearer.
worksheet.set_column('A:A', 30)

In [None]:
from io import BytesIO
counter = 2
for i in mgra_urb.mgra.unique():
    df = mgra_urb[mgra_urb['mgra']==i]
    df2 = dw_df[dw_df['mgra']==i].copy()
    df2.rename(columns={'mgra':'forecast for mgra'},inplace=True)
    df3 = estimates_df[estimates_df['mgra']==i].copy()
    df3.rename(columns={'mgra':'estimates for mgra'},inplace=True)
    ylimmax = max(df2.unit_change.max(),df.unit_change.max(),df3.unit_change.max())
    
    df_pivot = df.pivot(index='year_simulation', columns='site_id', values='unit_change')
    df_pivot = df_pivot.fillna(0)
    df_pivot = df_pivot.reindex(idx, fill_value=0)
    df2_pivot = df2.pivot(index='year_simulation', columns='forecast for mgra', values='unit_change')
    df2_pivot = df2_pivot.fillna(0)
    df2_pivot = df2_pivot.reindex(idx, fill_value=0)
    df2_pivot.index.name = 'forecast for mgra'
    
    df3_pivot = df3.pivot(index='year_simulation', columns='estimates for mgra', values='unit_change')
    df3_pivot = df3_pivot.fillna(0)
    df3_pivot = df3_pivot.reindex(idx, fill_value=0)
    df3_pivot.index.name = 'estimates for mgra'
    
    # plot table
    x = result[result['mgra']==i]
    imgdata = BytesIO()
    y = x[['mgra','estimates_unit_change_2017_to_2018','jurisdiction provided capacity',\
       'scheduled development capacity','ADU capacity','total_capacity',\
       'unit_change_forecast']]
    y.set_index('mgra',inplace=True)
    yt = y.T
    fig, ax = plt.subplots(figsize=(12, 2)) # set size frame
    ax.xaxis.set_visible(False)  # hide the x axis
    ax.yaxis.set_visible(False)  # hide the y axis
    ax.set_frame_on(False)  # no visible frame, uncomment if size is ok
    tabla = table(ax, yt, loc='upper right', colWidths=[0.17]*len(df.columns))  # where df is your data frame
    tabla.auto_set_font_size(False) # Activate set fontsize manually
    tabla.set_fontsize(12) # if ++fontsize is necessary ++colWidths
    tabla.scale(1.2, 1.2) # change size table
    fig.savefig(imgdata, format="png")
    placeholder = 'X' + str(counter)
    imgdata.seek(0)
    worksheet.insert_image(placeholder, "",{'image_data': imgdata})
    plt.close(fig)
    
    #plt.figure()
    ax = df_pivot.plot(title='Urbansim for MGRA ' + str(i),style='.-')
    ax.set_ylim(0,ylimmax)
    imgdata = BytesIO()
    fig = ax.get_figure()
    plt.ylabel('unit change')
    plt.xlabel('urbansim year')
    filename = 'sched_dev_mgra_' + str(i) + '_.png'
    fig.savefig(imgdata, format="png")
    placeholder = 'L' + str(counter)
    imgdata.seek(0)
    worksheet.insert_image(placeholder, "",{'image_data': imgdata})
    plt.close(fig)
    
    
    #plt.figure()
    ax = df2_pivot.plot(title='Demographic Warehouse for MGRA ' + str(i),style='.-',\
                       color='black')
    ax.set_ylim(0,ylimmax)
    imgdata = BytesIO()
    fig = ax.get_figure()
    plt.ylabel('unit change')
    plt.xlabel('forecast increment')
    filename = 'demographic_warehouse_' + str(i) + '_.png'
    fig.savefig(imgdata, format="png")
    placeholder = 'V' + str(counter)
    imgdata.seek(0)
    worksheet.insert_image(placeholder, "",{'image_data': imgdata})
    plt.close(fig)
    
    #plt.figure()
    ax = df3_pivot.plot(title='Estimates for MGRA ' + str(i),style='.-',\
                        color='red')
    ax.set_ylim(0,ylimmax)
    imgdata = BytesIO()
    fig = ax.get_figure()
    plt.ylabel('unit change')
    plt.xlabel('estimates 2017-2018')
    filename = 'demographic_warehouse_' + str(i) + '_.png'
    fig.savefig(imgdata, format="png")
    placeholder = 'B' + str(counter)
    imgdata.seek(0)
    worksheet.insert_image(placeholder, "",{'image_data': imgdata})
    plt.close(fig)
    
    
    # plt.savefig('table.png', transparent=True)
    
    counter = counter + 21

In [None]:
workbook.close()