In [1]:
import pandas as pd
import os
import sys
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
import shutil
from osgeo import ogr
from collections import OrderedDict
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.ticker import MaxNLocator
import boto3


sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_csv_from_directory, upload_csv_aws, delete_items
from scripts.utils.cal_crai_calculations import calculate_equal_weighted_index, add_census_tracts
from scripts.utils.cal_crai_plotting import index_plot, plot_region_domain, plot_hazard_score

In [2]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '3_fair_data/index_data/calcrai_score'
output_folder = 'cal_crai_score_file'

pull_csv_from_directory(bucket_name, aws_dir, output_folder, search_zipped=False, print_name=False)

Metric data retrieved from 3_fair_data/index_data/calcrai_score.


In [3]:
calcrai_score = pd.read_csv(r'cal_crai_score_file/calcrai_score.csv')

In [4]:
# Initialize the S3 client
s3_client = boto3.client('s3')

# Bucket name and file paths
bucket_name = 'ca-climate-index'
directory = '0_map_data/crosswalk_data/final_cci_project_indicators_and_climate_risk_with_contextual_columns.csv'

print('Pulling file')
s3_client.download_file(bucket_name, directory, 'final_cci_project_indicators_and_climate_risk_with_contextual_columns.csv')
print('File pulled')

Pulling file
File pulled


In [5]:
crosswalk_data = pd.read_csv('final_cci_project_indicators_and_climate_risk_with_contextual_columns.csv')

  crosswalk_data = pd.read_csv('final_cci_project_indicators_and_climate_risk_with_contextual_columns.csv')


In [6]:
climate_mitigation_cci_data = crosswalk_data[pd.notna(crosswalk_data['Climate_Risk_Mitigation'])]

In [7]:
for col in climate_mitigation_cci_data.columns:
    print(col)

Project IDNumber
Reporting Cycle Name
Agency Name
Program Name
Program Description
Sub Program Name
Record Type
Project Name
Project Type
Project Description
SECTOR
CATEGORY
ACTION
Census Tract
Address
Lat Long
Senate
District
Assembly
District
County
Total Project Cost
Total Program GGRFFunding
Project Life Years
Total Project GHGReductions
Annual Project GHGReductions
Project Count
Fiscal Year Funding Project
Is Benefit Disadvantaged Communities
Disadvantaged Community Criteria
Disadvantaged Community Need
Disadvantaged Community Census Tracts
Total GGRFDisadvantaged Community Funding
Disadvantaged Community Benefits Description
Funding Benefiting Disadvantaged Communities
Estimated Num Vehicles In Service
Funding Within Disadvantage Communities
Other Project Benefits Description
VMTReductions
Number Of Housing Units
Number Of Affordable Housing Units
Estimated Number Of Trees To Be Planted
Energy Cost Savings
Estimated Energy Saved KWH
Estimated Energy Saved Therms
Estimated Water S

In [8]:
spatial_columns = ['Lat Long', 'Census Tract', 'County']

# Select only the specified spatial columns
inspect_spatial_columns = climate_mitigation_cci_data[spatial_columns]

# Display the count of non-NaN values for each column
print("Non-NaN value counts for selected spatial columns:")
for column in spatial_columns:
    non_nan_count = inspect_spatial_columns[column].notna().sum()
    print(f"{column}: {non_nan_count} non-NaN values")

# Display the first few rows of the selected columns
print("\nFirst 5 rows of selected spatial columns:")
print(inspect_spatial_columns.head())


Non-NaN value counts for selected spatial columns:
Lat Long: 5279 non-NaN values
Census Tract: 11554 non-NaN values
County: 16822 non-NaN values

First 5 rows of selected spatial columns:
                                     Lat Long  Census Tract      County
15688                      36.18133,-118.3087           NaN      Tulare
15689  40.36507,-120.95021;40.2403,-120.91051           NaN      Lassen
15690  38.05462,-121.71795;38.0414,-121.77348           NaN  Sacramento
15691                     39.30251,-120.11346           NaN      Placer
15692                      40.3321,-121.47321           NaN      Tehama


## Drop all rows without valid Census Tract

In [9]:
climate_mitigation_cci_data = climate_mitigation_cci_data.dropna(subset=['Census Tract'])


In [10]:
# GEOID handling
climate_mitigation_cci_data['GEOID'] = climate_mitigation_cci_data['Census Tract'].apply(lambda x: '0' + str(x))
climate_mitigation_cci_data['GEOID'] = climate_mitigation_cci_data['GEOID'].astype(str).apply(lambda x: x.rstrip('0').rstrip('.') if '.' in x else x)

# Ensure all values in GEOID are strings
climate_mitigation_cci_data['GEOID'] = climate_mitigation_cci_data['GEOID'].astype(str)

In [11]:
climate_mitigation_cci_data

keep_columns = [
    'GEOID',
    'Total Project Cost',
    'Project Count',
    'SECTOR', 
    'CATEGORY', 
    'ACTION', 
    'Project Type',
    'Project Description',
    'Sub Program Name',
    'Other Project Benefits Description', 
    'Disadvantaged Community Benefits Description',
    'Is Low Income Communities',
    'Climate  Adaptation', 
    'Climate_Risk_Mitigation'
]

subset_climate_mitigation_cci = climate_mitigation_cci_data[keep_columns]
print(len(subset_climate_mitigation_cci))
subset_climate_mitigation_cci.head()

11554


Unnamed: 0,GEOID,Total Project Cost,Project Count,SECTOR,CATEGORY,ACTION,Project Type,Project Description,Sub Program Name,Other Project Benefits Description,Disadvantaged Community Benefits Description,Is Low Income Communities,Climate Adaptation,Climate_Risk_Mitigation
25097,6001401700,5563.0,1.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation
25098,6001402500,12237.0,3.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation
25099,6001406000,8584.0,1.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation
25100,6001406100,18124.0,2.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation
25101,6001407200,5028.0,1.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation


In [12]:
# GEOID handling
calcrai_score['GEOID'] = calcrai_score['GEOID'].apply(lambda x: '0' + str(x))
calcrai_score['GEOID'] = calcrai_score['GEOID'].astype(str).apply(lambda x: x.rstrip('0').rstrip('.') if '.' in x else x)

# Ensure all values in GEOID are strings
calcrai_score['GEOID'] = calcrai_score['GEOID'].astype(str)

keep_columns = [
    'GEOID',
    'hazard_score',
    'calcrai_score'
]

subset_calcrai_score = calcrai_score[keep_columns]

## Merge CCI data (that have valid Climate Risk Mitigation entries) with Cal-CRAI
* Drop any rows that dont have Cal-CRAI score

In [42]:
cci_calcrai_merged = pd.merge(subset_climate_mitigation_cci, subset_calcrai_score, on='GEOID', how='left')
cci_calcrai_merged_hazard_calcrai = cci_calcrai_merged.dropna(subset=['calcrai_score'])
cci_calcrai_merged_hazard_calcrai

Unnamed: 0,GEOID,Total Project Cost,Project Count,SECTOR,CATEGORY,ACTION,Project Type,Project Description,Sub Program Name,Other Project Benefits Description,Disadvantaged Community Benefits Description,Is Low Income Communities,Climate Adaptation,Climate_Risk_Mitigation,hazard_score,calcrai_score
0,06001401700,5563.0,1.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation,0.686870,0.382663
1,06001402500,12237.0,3.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation,0.662431,0.364412
2,06001406000,8584.0,1.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation,0.688563,0.351280
3,06001406100,18124.0,2.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation,0.839269,0.316310
4,06001407200,5028.0,1.0,Renewable Energy and Energy Efficiency,Energy Efficiency Upgrades and Rooftop Solar,Residential Installation,,Provides single-family and small multi-family ...,Single-Family Energy Efficiency and Solar PV,"Energy savings, employment, training, improved...",Project provides energy efficiency upgrades an...,1.0,,extreme heat mitigation,0.753209,0.351219
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11549,06037461400,1366.0,1.0,Water and Energy Efficiency,Water Efficiency,Incentives,High Efficiency Clothes Washer Commercial All-...,The project will expand the current WeDIP by o...,Water-Energy Grant Program,,,0.0,,drought mitigation,0.400334,2.161602
11550,06037462400,1920.0,8.0,Water and Energy Efficiency,Water Efficiency,Incentives,Lavatory Faucets,The project will expand the current WeDIP by o...,Water-Energy Grant Program,,,0.0,,drought mitigation,0.398406,2.057077
11551,06037461902,1366.0,1.0,Water and Energy Efficiency,Water Efficiency,Incentives,High Efficiency Clothes Washer Commercial All-...,The project will expand the current WeDIP by o...,Water-Energy Grant Program,,,0.0,,drought mitigation,0.348328,2.189421
11552,06037463500,1366.0,1.0,Water and Energy Efficiency,Water Efficiency,Incentives,High Efficiency Clothes Washer Commercial All-...,The project will expand the current WeDIP by o...,Water-Energy Grant Program,,,0.0,,drought mitigation,0.396480,2.126560


In [44]:
# Group by 'GEOID' and count the occurrences
geoid_counts = cci_calcrai_merged_hazard_calcrai.groupby('GEOID').size().reset_index(name='Count')

# Now, aggregate the 'hazard_score' and 'calcrai_score' columns by 'GEOID'
aggregated_scores = cci_calcrai_merged_hazard_calcrai.groupby('GEOID')[['hazard_score', 'calcrai_score']].mean().reset_index()

# Merge the counts with the aggregated scores
geoid_counts = geoid_counts.merge(aggregated_scores, on='GEOID')

# Sort in descending order based on 'Count'
geoid_counts = geoid_counts.sort_values(by='Count', ascending=False)

# Display the resulting DataFrame
print('Number of unique census tracts after merging CCI Climate Mitigation & Cal-CRAI (non nan climate mitigation or Cal-CRAI score columns):', 
      len(geoid_counts))
print('')
print('Median value of Cal-CRAI Score within subset (the higher, the more community capacity and overall resiliency):',geoid_counts.calcrai_score.median())
print('')
print('Median value of Climate Hazard Score within subset (the higher, the more at risk, 0-1 scale):',geoid_counts.hazard_score.median())
print('')
print(geoid_counts.head(50))

Number of unique census tracts after merging CCI Climate Mitigation & Cal-CRAI (non nan climate mitigation or Cal-CRAI score columns): 2731

Median value of Cal-CRAI Score within subset (the higher, the more community capacity and overall resiliency): 0.9185559004450764

Median value of Climate Hazard Score within subset (the higher, the more at risk, 0-1 scale): 0.42953871994077614

            GEOID  Count  hazard_score  calcrai_score
1172  06037543322     64      0.486640       1.844328
1170  06037543306     42      0.401251       3.192024
1168  06037543304     39      0.485234       1.704851
931   06037461902     39      0.348328       2.189421
1181  06037543801     32      0.533652       1.444490
1336  06037602700     29      0.485170       1.772201
934   06037462100     29      0.400354       2.062663
922   06037460900     26      0.401174       1.940916
1302  06037600912     25      0.430352       1.932576
1174  06037543501     24      0.460947       1.928550
1173  06037543400  

In [21]:
# Count the number of entries for each unique GEOID and sort in descending order
geoid_counts = cci_calcrai_merged_hazard_calcrai.groupby('GEOID').size().reset_index(name='Count').sort_values(by='Count', ascending=False)

# Display the resulting DataFrame
print(geoid_counts.head())


            GEOID  Count
1172  06037543322     64
1170  06037543306     42
1168  06037543304     39
931   06037461902     39
1181  06037543801     32
