In [21]:
import os 
import pandas as pd
import numpy as np
import glob
import pyodbc
import gc

# Census Redistricting Data

## Housing Data

### Grabbing Data From SQL

In [22]:
conn = pyodbc.connect('Driver={ODBC Driver 17 for SQL Server};'
                    'Server=DDAMWSQL16.sandag.org;'
                    'Database=estimates;'
                    'Trusted_Connection=yes;')

with open(r'C:\Users\cra\OneDrive - San Diego Association of Governments\QA_Repository\2023\2023-024 Estimates 2022\sql_queries\ct_population.sql', 'r') as sql_file:
    sql_query = sql_file.read()

census_redistricting_ct_data =  pd.read_sql_query(sql_query, conn)
census_redistricting_ct_data

Unnamed: 0,census_tract,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White",Total Population
0,100.0,280,4,129,28,1,11,154,2290,2908
1,201.0,262,2,105,25,6,3,91,1500,2001
2,202.0,746,7,266,75,6,42,220,3063,4441
3,301.0,350,12,153,49,0,7,100,1473,2155
4,302.0,495,9,176,91,9,20,149,1853,2811
...,...,...,...,...,...,...,...,...,...,...
732,21900.0,1925,15,141,116,17,14,36,347,2625
733,22000.0,2511,15,1402,164,49,16,92,260,4526
734,22101.0,306,5,218,58,4,6,150,1867,2631
735,22102.0,1713,16,782,113,12,46,345,3982,7024


### Grabbing Internal Data

In [23]:
ct_housing_2022_01_data = pd.read_csv(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-023 Estimates 2022\Data\ethnicity\census_tract_ethnicity_est_2022_01_ind_QA.csv')
ct_housing_2022_01_data = ct_housing_2022_01_data[ct_housing_2022_01_data['yr_id'] == 2020]
ct_housing_2022_01_data = ct_housing_2022_01_data.drop('yr_id', axis=1)
ct_housing_2022_01_data

Unnamed: 0,census_tract,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White"
0,100,529,0,120,0,0,0,54,2429
3,201,84,0,145,2,0,0,145,1773
6,202,604,0,311,11,0,2,120,3216
9,301,564,0,84,40,0,0,104,1592
12,302,496,1,67,112,4,6,42,2334
...,...,...,...,...,...,...,...,...,...
2193,21800,311,0,48,0,0,0,54,1657
2196,21900,1592,113,604,1114,39,427,250,4736
2199,22000,1753,0,1314,141,0,4,106,564
2202,22101,138,0,147,47,0,0,226,1406


### Check census tract differences

In [24]:
in_cr_not_in_est = [x for x in list(census_redistricting_ct_data['census_tract']) if x not in list(ct_housing_2022_01_data['census_tract'])]
print(f"The following CT are in census redistricting but not in estimates: {in_cr_not_in_est}")

in_est_not_in_cr = [x for x in list(ct_housing_2022_01_data['census_tract']) if x not in list(census_redistricting_ct_data['census_tract'])]
print(f"The following CT are in estimamtes but not in census redistricting: {in_est_not_in_cr}")

The following CT are in census redistricting but not in estimates: [990100.0]
The following CT are in estimamtes but not in census redistricting: []


### Create the diff

In [25]:
census_redistricting_ct_data = census_redistricting_ct_data.set_index('census_tract')
census_redistricting_ct_data

Unnamed: 0_level_0,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White",Total Population
census_tract,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
100.0,280,4,129,28,1,11,154,2290,2908
201.0,262,2,105,25,6,3,91,1500,2001
202.0,746,7,266,75,6,42,220,3063,4441
301.0,350,12,153,49,0,7,100,1473,2155
302.0,495,9,176,91,9,20,149,1853,2811
...,...,...,...,...,...,...,...,...,...
21900.0,1925,15,141,116,17,14,36,347,2625
22000.0,2511,15,1402,164,49,16,92,260,4526
22101.0,306,5,218,58,4,6,150,1867,2631
22102.0,1713,16,782,113,12,46,345,3982,7024


In [26]:
ct_housing_2022_01_data = ct_housing_2022_01_data.set_index('census_tract')
ct_housing_2022_01_data

Unnamed: 0_level_0,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White"
census_tract,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
100,529,0,120,0,0,0,54,2429
201,84,0,145,2,0,0,145,1773
202,604,0,311,11,0,2,120,3216
301,564,0,84,40,0,0,104,1592
302,496,1,67,112,4,6,42,2334
...,...,...,...,...,...,...,...,...
21800,311,0,48,0,0,0,54,1657
21900,1592,113,604,1114,39,427,250,4736
22000,1753,0,1314,141,0,4,106,564
22101,138,0,147,47,0,0,226,1406


In [27]:
census_redistricting_ct_data = census_redistricting_ct_data.loc[ct_housing_2022_01_data.index]
census_redistricting_ct_data

Unnamed: 0_level_0,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White",Total Population
census_tract,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
100,280,4,129,28,1,11,154,2290,2908
201,262,2,105,25,6,3,91,1500,2001
202,746,7,266,75,6,42,220,3063,4441
301,350,12,153,49,0,7,100,1473,2155
302,495,9,176,91,9,20,149,1853,2811
...,...,...,...,...,...,...,...,...,...
21800,233,0,46,13,0,5,96,1584,1980
21900,1925,15,141,116,17,14,36,347,2625
22000,2511,15,1402,164,49,16,92,260,4526
22101,306,5,218,58,4,6,150,1867,2631


In [28]:
diff = ct_housing_2022_01_data - census_redistricting_ct_data
diff = diff.drop('Total Population', axis=1)
diff

Unnamed: 0_level_0,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White"
census_tract,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
100,249,-4,-9,-28,-1,-11,-100,139
201,-178,-2,40,-23,-6,-3,54,273
202,-142,-7,45,-64,-6,-40,-100,153
301,214,-12,-69,-9,0,-7,4,119
302,1,-8,-109,21,-5,-14,-107,481
...,...,...,...,...,...,...,...,...
21800,78,0,2,-13,0,-5,-42,73
21900,-333,98,463,998,22,413,214,4389
22000,-758,-15,-88,-23,-49,-12,14,304
22101,-168,-5,-71,-11,-4,-6,76,-461


In [29]:
# Create a Pandas Excel writer using xlsxwriter as the engine
writer = pd.ExcelWriter(rf'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-023 Estimates 2022\Results\census_redistricting_data\census_tract_diff_2022_01_est_minus_census_redistricting_ethnicity.xlsx', engine='xlsxwriter')
#writer = pd.ExcelWriter('census_tract_diff_2022_01_est_minus_census_redistricting.xlsx')

# Write each data frame to a different sheet
ct_housing_2022_01_data.reset_index().to_excel(writer, sheet_name='Estimates Data', index=False)
census_redistricting_ct_data.reset_index().to_excel(writer, sheet_name='Census Redistricting Data', index=False)
diff.reset_index().to_excel(writer, sheet_name='Diff', index=False)

writer.save()

### Region

In [30]:
# Create a Pandas Excel writer using xlsxwriter as the engine
# writer = pd.ExcelWriter(rf'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-023 Estimates 2022\Results\census_redistricting_data\jurr_diff_2022_01_est_minus_census_redistricting.xlsx', engine='xlsxwriter')
writer = pd.ExcelWriter(r'C:\Users\cra\San Diego Association of Governments\SANDAG QA QC - Documents\Projects\2023\2023-023 Estimates 2022\Results\census_redistricting_data\region_diff_2022_01_est_minus_census_redistricting_ethnicity.xlsx')

# Write each data frame to a different sheet
pd.DataFrame(ct_housing_2022_01_data.reset_index(drop=True).sum(axis=0)).T.reset_index().to_excel(writer, sheet_name='Estimates Data', index=False)
pd.DataFrame(census_redistricting_ct_data.reset_index(drop=True).sum(axis=0)).T.reset_index().to_excel(writer, sheet_name='Census Redistricting Data', index=False)
pd.DataFrame(diff.reset_index(drop=True).sum(axis=0)).T.reset_index().to_excel(writer, sheet_name='Diff', index=False)

writer.save()

In [31]:
ct_housing_2022_01_data

Unnamed: 0_level_0,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White"
census_tract,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
100,529,0,120,0,0,0,54,2429
201,84,0,145,2,0,0,145,1773
202,604,0,311,11,0,2,120,3216
301,564,0,84,40,0,0,104,1592
302,496,1,67,112,4,6,42,2334
...,...,...,...,...,...,...,...,...
21800,311,0,48,0,0,0,54,1657
21900,1592,113,604,1114,39,427,250,4736
22000,1753,0,1314,141,0,4,106,564
22101,138,0,147,47,0,0,226,1406


In [32]:
sum(ct_housing_2022_01_data['Hispanic'])

1087686

In [33]:
sum(census_redistricting_ct_data['Hispanic'])

1119629

In [34]:
sum(ct_housing_2022_01_data['Hispanic']) - sum(census_redistricting_ct_data['Hispanic'])

-31943

In [35]:
pd.DataFrame(diff.reset_index(drop=True).sum(axis=0)).T.reset_index()

Unnamed: 0,index,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White"
0,0,-31943,-1378,-12010,10494,-1025,-5117,-22358,109350


In [36]:
pd.DataFrame(ct_housing_2022_01_data.reset_index(drop=True).sum(axis=0)).T.reset_index()

Unnamed: 0,index,Hispanic,"Non-Hispanic, American Indian or Alaska Native","Non-Hispanic, Asian","Non-Hispanic, Black","Non-Hispanic, Hawaiian or Pacific Islander","Non-Hispanic, Other","Non-Hispanic, Two or More Races","Non-Hispanic, White"
0,0,1087686,11463,388579,155508,11966,13008,131514,1531555


: 