# Imports

In [69]:
# Import Libraries
import pandas as pd
import numpy as np
import yaml
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pyodbc
import glob
import copy
import PySimpleGUI as sg
import traceback

# Updated MGRA 13 Data

## MGRA 13 Housing Data

In [187]:
# DSID 43
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                      'Server=DDAMWSQL16.sandag.org;'
                      'Database=demographic_warehouse;'
                      'Trusted_Connection=yes;')

qry = '''SELECT denorm.mgra_id
      ,housing.[structure_type_id]
	  ,structure.short_name
      ,[units]
      ,[unoccupiable]
      ,[occupied]
      ,[vacancy]
      ,jur.jurisdiction
  FROM [demographic_warehouse].[fact].[housing] AS housing
  LEFT JOIN [demographic_warehouse].[dim].[structure_type] AS structure
  ON housing.structure_type_id = structure.structure_type_id
  LEFT JOIN [demographic_warehouse].[dim].[mgra_denormalize] AS denorm
  ON housing.mgra_id = denorm.mgra_id
  LEFT JOIN [demographic_warehouse].[dim].[mgra_denormalize] AS jur
  ON housing.mgra_id = jur.mgra_id
  WHERE datasource_id = 43 AND yr_id = 2019;'''


old_data_updated = pd.read_sql_query(qry, conn)
old_data_updated

Unnamed: 0,mgra_id,structure_type_id,short_name,units,unoccupiable,occupied,vacancy,jurisdiction
0,1400181501,1,sf,3,1,2,1.0,San Diego
1,1400181501,2,sfmu,7,1,6,1.0,San Diego
2,1400181501,3,mf,0,0,0,0.0,San Diego
3,1400181501,6,sfa,0,0,0,0.0,San Diego
4,1400181501,5,sfd,0,0,0,0.0,San Diego
...,...,...,...,...,...,...,...,...
163891,1401622402,3,mf,0,0,0,0.0,Unincorporated
163892,1401622501,4,mh,0,0,0,0.0,Unincorporated
163893,1401622601,2,sfmu,0,0,0,0.0,Unincorporated
163894,1401622701,4,mh,0,0,0,0.0,Unincorporated


In [188]:
import copy
# Copy the MGRA 13 Data Over
old_data_up = copy.deepcopy(old_data_updated)

# Create a dataframe of just the information we want
df = old_data_up[['jurisdiction', 'short_name', 'units']]

In [198]:
# Pivot the data then group by jurisdiction as we want this at the jurisdiction level
housing_data = pd.concat([df.pivot(columns ='short_name', values ='units'), old_data_up[['jurisdiction']]], axis = 1).groupby('jurisdiction').sum().reset_index()

In [201]:
# Merge the housing data that I created above with the rest of the data to get all of the data together
housing_data.merge(old_data_up[['jurisdiction', 'units', 'unoccupiable', 'occupied', 'vacancy']].groupby('jurisdiction').sum().reset_index(), how='left', on='jurisdiction')

Unnamed: 0,jurisdiction,mf,mh,sf,sfa,sfd,sfmu,units,unoccupiable,occupied,vacancy
0,Carlsbad,13284.0,1317.0,25098.0,0.0,0.0,6683.0,46382,2091,43838,2544.0
1,Chula Vista,29250.0,3855.0,40885.0,0.0,0.0,10137.0,84127,1475,82578,1549.0
2,Coronado,4106.0,0.0,4234.0,0.0,0.0,1248.0,9588,1937,7511,2077.0
3,Del Mar,724.0,0.0,1442.0,0.0,0.0,447.0,2613,442,2083,530.0
4,El Cajon,18581.0,2196.0,13919.0,0.0,0.0,1355.0,36051,599,34993,1058.0
5,Encinitas,4080.0,739.0,16291.0,0.0,0.0,5079.0,26189,941,24681,1508.0
6,Escondido,16730.0,3738.0,24676.0,0.0,0.0,3340.0,48484,984,47325,1159.0
7,Imperial Beach,3997.0,185.0,4397.0,0.0,0.0,1401.0,9980,339,9578,402.0
8,La Mesa,11522.0,303.0,12322.0,0.0,0.0,1640.0,25787,455,25160,627.0
9,Lemon Grove,2009.0,36.0,6462.0,0.0,0.0,610.0,9117,189,8609,508.0


## MGRA13 Population Data
This is still series 13 data. 

In [182]:
# DSID 43
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                      'Server=DDAMWSQL16.sandag.org;'
                      'Database=demographic_warehouse;'
                      'Trusted_Connection=yes;')

qry = '''SELECT
      [yr_id]
      ,pop.[mgra_id]
      ,[population]
	  ,jur.jurisdiction
  FROM [demographic_warehouse].[fact].[population] AS pop
  LEFT JOIN [demographic_warehouse].[dim].[mgra_denormalize] AS jur
  ON pop.mgra_id = jur.mgra_id
  WHERE datasource_id = 43 AND yr_id = 2019;'''


old_data_updated_pop = pd.read_sql_query(qry, conn)
old_data_updated_pop

Unnamed: 0,yr_id,mgra_id,population,jurisdiction
0,2019,1400181501,0,San Diego
1,2019,1400181501,0,San Diego
2,2019,1400181501,0,San Diego
3,2019,1400181502,0,Unincorporated
4,2019,1400181502,0,Unincorporated
...,...,...,...,...
109259,2019,1402196803,0,Unincorporated
109260,2019,1402196901,0,Unincorporated
109261,2019,1402196902,0,Unincorporated
109262,2019,1402033701,0,Unincorporated


In [184]:
old_data_updated_pop[['population', 'jurisdiction']].groupby('jurisdiction').sum()

Unnamed: 0_level_0,population
jurisdiction,Unnamed: 1_level_1
Carlsbad,113635
Chula Vista,271032
Coronado,23814
Del Mar,4275
El Cajon,104104
Encinitas,62096
Escondido,152391
Imperial Beach,27934
La Mesa,59827
Lemon Grove,26426


# Crosswalk to new MGRA15
Using this crosswalk below I will now be able to update the MGRA13 data with MGRA15 MGRA breakdown and have a new MGRA15 output. The new MGRA15 data did not change any of the underlying data it just update the MGRA breakdown.

In [202]:
mgra_crosswalk = pd.read_csv('T:/socioec/Current_Projects/XPEF39/MGRA13_15_xref/MGRA13_15_p2poly.csv')

In [203]:
mgra_crosswalk

Unnamed: 0,MGRA_13,MGRA_15
0,1,11268
1,2,11268
2,3,5789
3,4,11273
4,5,11270
...,...,...
22997,22998,23176
22998,22999,23244
22999,23000,23245
23000,23001,23246


## MGRA15 Housing Info

In [217]:
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                      'Server=DDAMWSQL16.sandag.org;'
                      'Database=demographic_warehouse;'
                      'Trusted_Connection=yes;')

qry = '''SELECT denorm.mgra
      ,housing.[structure_type_id]
	  ,structure.short_name
      ,[units]
      ,[unoccupiable]
      ,[occupied]
      ,[vacancy]
  FROM [demographic_warehouse].[fact].[housing] AS housing
  LEFT JOIN [demographic_warehouse].[dim].[structure_type] AS structure
  ON housing.structure_type_id = structure.structure_type_id
  LEFT JOIN [demographic_warehouse].[dim].[mgra_denormalize] AS denorm
  ON housing.mgra_id = denorm.mgra_id
  WHERE datasource_id = 43 AND yr_id = 2019;'''


old_data_updated = pd.read_sql_query(qry, conn)
old_data_updated


Unnamed: 0,mgra,structure_type_id,short_name,units,unoccupiable,occupied,vacancy
0,1,6,sfa,0,0,0,0.0
1,1,6,sfa,0,0,0,0.0
2,2,6,sfa,0,0,0,0.0
3,2,6,sfa,0,0,0,0.0
4,3,6,sfa,0,0,0,0.0
...,...,...,...,...,...,...,...
163891,4734,3,mf,1,0,1,0.0
163892,5919,3,mf,1,0,1,0.0
163893,11209,4,mh,1,0,1,0.0
163894,17931,4,mh,1,0,1,0.0


In [218]:
# Taking the MGRA13 data and using the crosswalk to get the MGRA15 data
old_data_updated = old_data_updated.merge(mgra_crosswalk, how='left', left_on='mgra', right_on='MGRA_13')
old_data_updated

Unnamed: 0,mgra,structure_type_id,short_name,units,unoccupiable,occupied,vacancy,MGRA_13,MGRA_15
0,1,6,sfa,0,0,0,0.0,1,11268
1,1,6,sfa,0,0,0,0.0,1,11268
2,2,6,sfa,0,0,0,0.0,2,11268
3,2,6,sfa,0,0,0,0.0,2,11268
4,3,6,sfa,0,0,0,0.0,3,5789
...,...,...,...,...,...,...,...,...,...
163891,4734,3,mf,1,0,1,0.0,4734,9030
163892,5919,3,mf,1,0,1,0.0,5919,9139
163893,11209,4,mh,1,0,1,0.0,11209,14855
163894,17931,4,mh,1,0,1,0.0,17931,19307


In [226]:
# Performing the same manipulations as I did with the MGRA13 data now just with MGRA15
old_data_up = copy.deepcopy(old_data_updated)
df = old_data_up[['MGRA_15', 'short_name', 'units']]

housing_data = pd.concat([df.pivot(columns ='short_name', values ='units'), old_data_up[['MGRA_15']]], axis = 1).groupby('MGRA_15').sum().reset_index()
housing_data
mgra_15_housing = housing_data.merge(old_data_up[['MGRA_15', 'units', 'unoccupiable', 'occupied', 'vacancy']].groupby('MGRA_15').sum().reset_index(), how='left', on='MGRA_15')
mgra_15_housing

Unnamed: 0,MGRA_15,mf,mh,sf,sfa,sfd,sfmu,units,unoccupiable,occupied,vacancy
0,1,92.0,0.0,145.0,0.0,0.0,10.0,247,2,242,5.0
1,2,56.0,0.0,0.0,0.0,0.0,0.0,56,4,52,4.0
2,3,177.0,0.0,18.0,0.0,0.0,5.0,200,2,195,5.0
3,4,0.0,0.0,4.0,0.0,0.0,0.0,4,1,2,2.0
4,5,0.0,0.0,31.0,0.0,0.0,12.0,43,4,39,4.0
...,...,...,...,...,...,...,...,...,...,...,...
21618,25397,0.0,0.0,91.0,0.0,0.0,13.0,104,0,104,0.0
21619,25413,58.0,0.0,6.0,0.0,0.0,6.0,70,26,44,26.0
21620,25427,0.0,0.0,38.0,0.0,0.0,13.0,51,6,45,6.0
21621,25448,192.0,0.0,189.0,0.0,0.0,96.0,477,20,454,23.0


## MGRA15 Population Data

In [232]:
# DSID 43
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                      'Server=DDAMWSQL16.sandag.org;'
                      'Database=demographic_warehouse;'
                      'Trusted_Connection=yes;')

qry = '''SELECT
      [yr_id]
      ,pop.[mgra_id]
      ,[population]
      ,denorm.mgra
  FROM [demographic_warehouse].[fact].[population] AS pop
  LEFT JOIN [demographic_warehouse].[dim].[mgra_denormalize] AS denorm
  ON pop.mgra_id = denorm.mgra_id
  WHERE datasource_id = 43 AND yr_id = 2019;'''


old_data_updated_pop = pd.read_sql_query(qry, conn)
old_data_updated_pop

Unnamed: 0,yr_id,mgra_id,population,mgra
0,2019,1401366701,0,13667
1,2019,1401367001,0,13670
2,2019,1401367501,0,13675
3,2019,1401368101,0,13681
4,2019,1401368501,0,13685
...,...,...,...,...
109259,2019,1401366001,0,13660
109260,2019,1401366101,0,13661
109261,2019,1401366201,0,13662
109262,2019,1401366301,0,13663


In [236]:
df = old_data_updated_pop.merge(mgra_crosswalk, how='left', left_on='mgra', right_on='MGRA_13')
df[['MGRA_15', 'population']].groupby('MGRA_15').sum()

Unnamed: 0_level_0,population
MGRA_15,Unnamed: 1_level_1
1,760
2,83
3,559
4,3
5,105
...,...
25397,390
25413,103
25427,109
25448,1141


# Checking Raw Files

In [237]:
mgra_13_raw = pd.read_csv("T:/socioec/Current_Projects/XPEF39/abm_csv/mgra13_based_input2019_01.csv")

In [240]:
mgra_13_raw['hs'].sum()

1204818

In [241]:
mgra_15_raw = pd.read_csv("T:/socioec/Current_Projects/XPEF39/abm_csv_mgra15/mgra15_based_input2019.csv")

In [244]:
mgra_15_raw['hs'].sum()

1204471.0

# Region Level Diff Between MGRA13 and MGRA15
This is a request from Purva to see the region level difference between MGRA13 and MGRA15.

In [247]:
mgra_13_region = pd.read_csv('C:/Users/cra/San Diego Association of Governments/SANDAG QA QC - Documents/Projects/2022/2022-58 2019 Base Year Forecast Output QC/data/MGRA13 Updated Data/mgra13_update_region_ind_QA.csv')

In [248]:
mgra_15_region = pd.read_csv('C:/Users/cra/San Diego Association of Governments/SANDAG QA QC - Documents/Projects/2022/2022-58 2019 Base Year Forecast Output QC/data/MGRA15 Data/mgra15_region_ind_QA.csv')

In [255]:
a = list(mgra_13_region.columns)
b = list(mgra_15_region.columns)

list(set(a) & set(b))

['collegeenroll',
 'emp_ag',
 '55 to 59',
 'parkarea',
 'hstallsoth',
 'emp_state_local_gov_blue',
 '35 to 39',
 'emp_whsle_whs',
 '75 to 79',
 'adultschenrl',
 'pseudomsa',
 'mparkcost',
 'hh_sf',
 'i10',
 'hh_mf',
 'i2',
 'hh_mh',
 'emp_religious',
 'hs',
 'emp_personal_svcs_office',
 'ech_dist',
 'hhp',
 'emp_const_non_bldg_prod',
 'i1',
 'hs_sf',
 'midpriceroom',
 'effective_acres',
 'mstallssam',
 'retempden',
 'emp_const_bldg_office',
 'empdenbin',
 'zip09',
 'hparkcost',
 'emp_fed_non_mil',
 'hotelroomtotal',
 'othercollegeenroll',
 'emp_mfg_office',
 'Pacific Islander',
 'i6',
 'emp_pvt_hh',
 'mstallsoth',
 'i5',
 'vacancy_rate',
 'i3',
 'acres',
 '20 to 24',
 'emp_const_bldg_prod',
 'gq_mil',
 '85 and Older',
 'luxuryroom',
 'emp_pvt_ed_post_k12_oth',
 '60 and 61',
 'dstallsoth',
 'vacancy',
 'empden',
 '5 to 9',
 'emp_total',
 'dparkcost',
 '15 to 17',
 'elem_population',
 'emp_amusement',
 'i7',
 'emp_pvt_ed_k12',
 'hh',
 'Female',
 'Other',
 'openspaceparkpreserve',
 'emp_f

In [256]:
mgra_13_region_clean = mgra_13_region[list(set(a) & set(b))]
mgra_15_region_clean = mgra_15_region[list(set(a) & set(b))]

In [269]:
df = mgra_13_region_clean.T.reset_index().merge(mgra_15_region_clean.T.reset_index(), how='left', on='index')
df.columns = ['category', 'MGRA13', 'MGRA15']


df['Diff'] = df['MGRA13'] - df['MGRA15']

df

Unnamed: 0,category,MGRA13,MGRA15,Diff
0,collegeenroll,213518.0,213518.0,0.0
1,emp_ag,9905.0,10000.0,-95.0
2,55 to 59,209873.0,210308.0,-435.0
3,parkarea,88810.0,88810.0,0.0
4,hstallsoth,144650.0,144650.0,0.0
...,...,...,...,...
134,emp_public_ed,0.0,0.0,0.0
135,62 to 64,113581.0,113816.0,-235.0
136,25 to 29,209843.0,210274.0,-431.0
137,emp_health,215961.0,215988.0,-27.0


In [270]:
# df.to_csv('C:/Users/cra/San Diego Association of Governments/SANDAG QA QC - Documents/Projects/2022/2022-58 2019 Base Year Forecast Output QC/data/DIFF/mgra_13_minus_15.csv')