In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from pysandag.database import get_connection_string
from matplotlib.backends.backend_pdf import PdfPages
%matplotlib inline

In [None]:
db_connection_string = get_connection_string('..\data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

## Get subregional simulation output

In [None]:
# get max run id from urbansim
run_id_sql = '''
SELECT max(run_id)
  FROM [urbansim].[urbansim].[urbansim_lite_output]
'''
run_id_df = pd.read_sql(run_id_sql, mssql_engine)
run_id = int(run_id_df.values)
print("\n   Max run id : {:,}".format(run_id))

hs_change_sql = '''
    SELECT o.parcel_id, j.name,  p.cap_jurisdiction_id, p.jurisdiction_id, p.mgra_id, p.luz_id,
    unit_change as hs_change, source, year_simulation
      FROM urbansim.urbansim.urbansim_lite_output o 
      JOIN urbansim.urbansim.parcel p on p.parcel_id = o.parcel_id
      JOIN urbansim.ref.jurisdiction j on p.cap_jurisdiction_id = j.jurisdiction_id
     WHERE run_id =  %s
  ORDER BY j.name,p.jurisdiction_id, year_simulation'''
hs_change_sql = hs_change_sql % run_id
hs = pd.read_sql(hs_change_sql,mssql_engine)
print("\n   Units added: {:,}".format(int(hs.hs_change.sum())))

In [None]:
hs.head()

## Get SR13

In [None]:
sr13_sql = '''
select x.mgra, sum([hs]) AS hs, increment, city, cpa, x.luz as luz_id,site
from [regional_forecast].[sr13_final].[capacity] x
join [regional_forecast].[sr13_final].[mgra13] y
on x.mgra = y.mgra
where scenario = 0 
group by x.mgra, site, increment, y.city,y.cpa,x.luz
order by x.mgra, increment'''
sr13_df = pd.read_sql(sr13_sql, mssql_engine)
# count results
nmgra = int(len(sr13_df.mgra.unique()))
nincrements = int(len(sr13_df.increment.unique()))
lendf = int(len(sr13_df))
duplicated_df = sr13_df[sr13_df.duplicated(subset=['mgra','increment'], keep="first")]
numdup = int(len(duplicated_df))
mi = nmgra*nincrements
nodups = lendf - numdup

In [None]:
print("\n   Number of increments: {:,}".format(nincrements))
print("\n   Number of mgras: {:,}".format(nmgra))
print("\nincrements * mgras: {:,}".format(mi))
print("\nsr13 dataframe minus duplicates: {:,}".format(nodups))
print("\nsr13 dataframe length: {:,}".format(lendf))
print("\n   Number of duplicates: {:,}  (mgra w site id and not site id)".format(numdup))

## QC check output against jurisdiction feedback confluence pg (since all capacity used)

https://sandag.atlassian.net/wiki/spaces/LUM/pages/101679105/Jurisdictional+Feedback

In [None]:
jur_cap_df = pd.DataFrame({'units_change': hs.groupby(['cap_jurisdiction_id']).hs_change.sum()}).reset_index()
jur_cap_df.set_index('cap_jurisdiction_id',inplace=True)
# jur_cap_df

## Get total dwelling units in the region and sum by jurisdiction and cpa

#### note using cap jurisdiction id

In [None]:
du_sql = '''
    SELECT parcel_id, mgra_id, luz_id, p.jurisdiction_id, cap_jurisdiction_id, j.name, du, capacity 
        FROM urbansim.parcel p
        LEFT JOIN urbansim.ref.jurisdiction j on p.cap_jurisdiction_id = j.jurisdiction_id
        WHERE du > 0'''
du = pd.read_sql(du_sql,mssql_engine)
du.cap_jurisdiction_id.fillna(du.jurisdiction_id,inplace=True) # where there is no cap jurisdiction id 
print("\n   Dwelling Units: {:,}".format(int(du.du.sum())))

### get luz names

In [None]:
luz_names_sql = '''
    SELECT zone as luz_id, name as luz
    FROM data_cafe.ref.geography_zone WHERE geography_type_id = 64'''
luz_names = pd.read_sql(luz_names_sql, mssql_engine)   

In [None]:
len(luz_names.luz_id.unique())

In [None]:
luz_names.luz_id.min()

In [None]:
luz_names.luz_id.max()

### rename dataframes

In [None]:
units = hs.copy()
dus = du.copy()

### sum dwelling units by LUZ (n=229)

In [None]:
du_sr14_geo_df = pd.DataFrame(dus.groupby(['luz_id']).\
                            du.sum()).reset_index()
du_sr14_geo_df.sort_values(by='luz_id',inplace=True)
du_sr14_geo_df.set_index('luz_id',inplace=True)
du_sr14_geo_df['du'] = du_sr14_geo_df['du'].astype(int)
print("\n Total residential dwelling units after groupby: {:,}".format(int(du_sr14_geo_df.du.sum())))
print("\n Total number of luz: {:,}\n".format(len(du_sr14_geo_df.index.unique())))

### add zero for counts to luz with NA for dwelling units 

In [None]:
idx = range(1,230)
len(idx)

In [None]:
du_sr14_geo_df = du_sr14_geo_df.reindex(idx, fill_value=0)
du_sr14_geo_df.fillna(0,inplace=True)

In [None]:
du_sr14_geo_df.loc[du_sr14_geo_df.du==0]

### sum hs change in simulation by luz (n=229)

In [None]:
unitsluz = pd.merge(units,luz_names[['luz_id','luz']],left_on='luz_id',right_on='luz_id',how = 'outer')

In [None]:
len(unitsluz.luz_id.unique())

In [None]:
unitsluz.fillna(0,inplace=True)

In [None]:
unitsluz.loc[unitsluz.hs_change.isnull()]

In [None]:
sr14_geo_df = pd.DataFrame({'hs_sum': unitsluz.groupby(['luz_id','luz','year_simulation']).\
                            hs_change.sum()}).reset_index()
sr14_geo_df.rename(columns = {'luz':'geo'},inplace=True)
# sr14_geo_df.rename(columns = {'year_simulation':'increment'},inplace=True)
sr14_geo_df.sort_values(by='luz_id',inplace=True)
sr14_geo_df.set_index('luz_id',inplace=True)
sr14_geo_df['hs_sum'] = sr14_geo_df['hs_sum'].astype(int)
sr14_geo_df['year_simulation'] = sr14_geo_df['year_simulation'].astype(int)
print("\n Total housing unit change after groupby: {:,}".format(int(sr14_geo_df.hs_sum.sum())))
print("\n Total number of luz: {:,}\n".format(len(sr14_geo_df.index.unique())))

In [None]:
# sr14_geo_df.head()

In [None]:
sr14_geo_df.loc[sr14_geo_df['year_simulation']== 0, 'year_simulation'] = 2017

In [None]:
# sr14_geo_df.loc[sr14_geo_df.hs_sum==0]

## Fill in "0" for units for "missing" simulation years (for plotting) (e.g. Del Mar)

In [None]:
sr14_geo_df.loc[43]

In [None]:
# before example
# del mar #34
before = sr14_geo_df.loc[34].sort_values(by='year_simulation')
# del_mar_before.head()
title_name = 'NULL values in ' + before.geo.values[0] + ' Housing Unit Change'
before.plot(x='year_simulation',y='hs_sum',style='.-',title=title_name)

In [None]:
idx = range(2017,2051)
sr14_geo_df.set_index(['geo','year_simulation'],append=True,inplace=True)
sr14_geo_df = sr14_geo_df.unstack(['luz_id','geo'])
sr14_geo_df = sr14_geo_df.reindex(idx, fill_value=0)
sr14_geo_df.fillna(0,inplace=True)
sr14_geo_df = sr14_geo_df.stack(['luz_id','geo'])
sr14_geo_df.reset_index(inplace=True)
sr14_geo_df.set_index('luz_id',inplace=True)

In [None]:
after = sr14_geo_df.loc[34].sort_values(by='year_simulation')
title_name = 'Replace Null with Zeroes ' + after.geo.values[0] + ' Housing Unit Change'
after.plot(x='year_simulation',y='hs_sum',style='.-',title=title_name)

In [None]:
len(sr14_geo_df.geo.unique())

In [None]:
len(du_sr14_geo_df)

In [None]:
len(sr14_geo_df.year_simulation.unique())

In [None]:
len(sr14_geo_df.year_simulation.unique()) * len(sr14_geo_df.geo.unique())

In [None]:
len(sr14_geo_df)

## Sum units from output of simulation over five year increments

In [None]:
bins = range(2015,2055,5)
names = [str(x) for x in range(2020,2055,5)]
sr14_geo_df['increment'] = pd.cut(sr14_geo_df.year_simulation, bins, labels=names)

In [None]:
sr14_increment = pd.DataFrame({'hs_increment': sr14_geo_df.
                                            groupby(["increment","luz_id","geo"]).
                                 hs_sum.sum()}).reset_index()
# sr14_increment.set_index('jcid',inplace=True)

In [None]:
sr14_increment.head()

## Cumulative sum units added by increment

In [None]:
sr14_increment['hs_cumulative'] = sr14_increment.groupby(['geo'])['hs_increment'].apply(lambda x: x.cumsum())
sr14_increment.set_index('luz_id',inplace=True)

In [None]:
sr14_increment.loc[sr14_increment.geo=='Del Mar']

## Add increment 2016 with units added equal to zero for baseline du (for plotting)

In [None]:
sr14_geo_df.head()

In [None]:
start_year = sr14_geo_df[['year_simulation','geo']].loc[sr14_geo_df.year_simulation==2017].copy()

In [None]:
len(start_year)

In [None]:
start_year['increment'] = '2016'
start_year['hs_cumulative'] = 0
del start_year['year_simulation']

# start_year['year_simulation'] = 'baseline'

In [None]:
sr14_increment = pd.concat([sr14_increment,start_year])

In [None]:
sr14_increment.head()

## sum sr14 by source - fill NA with "0"

In [None]:
sr14_source = pd.DataFrame({'hs_sum': unitsluz.groupby(['source','luz_id','luz']).\
                            hs_change.sum()}).reset_index()
sr14_source.rename(columns = {'luz':'geo'},inplace=True)
sr14_source.sort_values(by='luz_id',inplace=True)
sr14_source.set_index('luz_id',inplace=True)
sr14_source['hs_sum'] = sr14_source['hs_sum'].astype(int)
print("\n Total housing unit change after groupby: {:,}".format(int(sr14_source.hs_sum.sum())))
print("\n Total number of luz: {:,}\n".format(len(sr14_source.index.unique())))

In [None]:
# sr14_source.source.unique()
sr14_source.head()

In [None]:
idx = range(1,4)
sr14_source.set_index(['geo','source'],append=True,inplace=True)
sr14_source = sr14_source.unstack(['luz_id','geo'])
# sr14_source = sr14_source.reindex(idx, fill_value=0)
sr14_source.fillna(0,inplace=True)
sr14_source = sr14_source.stack(['luz_id','geo'])
sr14_source.reset_index(inplace=True)
sr14_source.set_index('luz_id',inplace=True)

#### get sched development totals

In [None]:
sr14_source1 =  sr14_source.loc[sr14_source.source=='1'].copy()

## Join simulation output with existing dwelling units

In [None]:
len(sr14_increment)

In [None]:
len(du_sr14_geo_df)

In [None]:
sr14 = sr14_increment.join(du_sr14_geo_df)

In [None]:
sr14['hs'] = sr14['hs_cumulative'] + sr14['du']

In [None]:
sr14.head()

## SR13 aggregrate

### add luz name to sr13

In [None]:
sr13_df = pd.merge(sr13_df,luz_names[['luz_id','luz']],left_on='luz_id',right_on='luz_id')

####  mgra level housing to cpa and jurisdiction and group by increment and sum

In [None]:
sr13_df.head()

In [None]:
sr13_geo_df = pd.DataFrame({'hs_sum': sr13_df.groupby(['luz','luz_id','increment']).
                               hs.sum()}).reset_index()
sr13_geo_df.rename(columns = {'luz':'geo'},inplace=True)
sr13_geo_df.sort_values(by='luz_id',inplace=True)
sr13_geo_df.head()

In [None]:
print("\nNum of geographies (luz) = {:,}\n".format(int(len(sr13_geo_df.loc[sr13_geo_df.increment==2015]))))

##  sr13 pivot so each luz is column and rows are increments

In [None]:
sr13_geo_df_pivot = sr13_geo_df.pivot\
(index='increment', columns='geo', values='hs_sum').\
reset_index().rename_axis(None, axis=1)
sr13_geo_df_pivot.set_index('increment',inplace=True)
sr13_geo_df_pivot

##  sr14 pivot so each luz is column and rows are increments

In [None]:
sr14_geo_df_pivot = sr14.pivot\
(index='increment', columns='geo', values='hs').\
reset_index().rename_axis(None, axis=1)
sr14_geo_df_pivot.set_index('increment',inplace=True)
sr14_geo_df_pivot.fillna(0,inplace=True)
sr14_geo_df_pivot

In [None]:
####  sr13 calculate total change by region and luz

In [None]:
total_diff = sr13_geo_df_pivot.loc[[2012,2050],:]
differences = total_diff.diff().loc[[2050]]
differences.rename(index={2050: 'total_change'},inplace=True)
totalchange = pd.DataFrame(differences.sum(axis=0))
totalchange.rename(columns={0: 'total_change'},inplace=True)
print("\nTotal Units added sr13: {:,}".format(int(totalchange.total_change.sum())))

In [None]:
len(sr14_geo_df.geo.unique())

In [None]:
sr14_geo_df.sort_index(inplace=True)

# plot

In [None]:
pp = PdfPages("out/sr13_and_draft_sr14_luz_compare.pdf")
for j, jur in enumerate(sr14_geo_df.geo.unique().tolist()):
# for j, jur in  enumerate(['Carlsbad','Chula Vista']):
    chg = int(sr14_increment.loc[(sr14_increment.increment=='2050') & (sr14_increment.geo==jur)].hs_cumulative)
    jur_and_cpa_plot = plt.figure()
    # plt.subplot(20, 1, j+1)
    # plotlabel = jur + '\nchg = ' + str(int(totalchange.loc[jur][0]))
    plotlabelsr14 = 'sr14: '+ str(sr14_geo_df.loc[sr14_geo_df['geo']==jur].index.values[0]) + '.' +\
                jur + '\nchg = ' + str(chg) + '\n(sched dev = ' + \
                str(int(sr14_source1.loc[sr14_source1['geo']==jur].iloc[0]['hs_sum'])) + ')'
    plotlabelsr13 = 'sr13: ' + str(sr13_geo_df.loc[sr13_geo_df['geo']==jur].luz_id.values[0]) + '.' +\
                jur + '\nchg = ' + str(int(totalchange.loc[jur][0]))
    
    #plt.plot(sr14_geo_df_pivot[[jur]].reset_index().increment.tolist(),sr14_geo_df_pivot[[jur]].reset_index()[jur].tolist(),
            # label=plotlabelsr14,
    #         sr13_geo_df_pivot[[jur]].reset_index().increment.tolist(),sr13_geo_df_pivot[[jur]].reset_index()[jur].tolist(),
     #        label=plotlabelsr13
     #       )
    plt.plot(sr14_geo_df_pivot[[jur]].reset_index().increment.tolist(),
             sr14_geo_df_pivot[[jur]].reset_index()[jur].tolist(),'b-o', label=plotlabelsr14)
    plt.plot(sr13_geo_df_pivot[[jur]].reset_index().increment.tolist(),
             sr13_geo_df_pivot[[jur]].reset_index()[jur].tolist(),'r-o', label=plotlabelsr13)
    plt.legend()
    plt.ylabel('Housing stock')
    plt.xlabel('Increment')
    plt.title('Series 13 and Draft Series 14\n   at LUZ')
    # plt.figure(figsize=(6, 4.5))
    plotname = 'out/luz/' + str(j+1) + '_luz.png'
    # plt.savefig(plotname,dpi = 600)
    fig = plt.gcf()
    fig.set_size_inches(6, 4.5)
    fig.savefig(plotname, bbox_inches='tight',dpi=600)
    # fig = plt.get_figure()
    # fig.set_size_inches(6, 4.5)
    # fig.savefig(filename, bbox_inches='tight', dpi=600)
    pp.savefig(jur_and_cpa_plot, dpi = 300, transparent = True)
pp.close()
# plt.savefig('sr13_jur_and_cpa.png')