# Sensitivity Stats Calculation

We will look at the sensitivity of the zonal statistics and the correlation with the PurpleAir based on weight for the surfaces and sigma of traffic dispersion.

In [1]:
# Import Libraries

import os

# Analysis

import numpy as np
import numpy.ma as ma # Masking
import pandas as pd
import geopandas as gpd
import rasterio


import matplotlib.pyplot as plt
import contextily

In [2]:
# Statistics that should be observed
# To be stored as a csv

holc = gpd.read_file(os.path.join(os.getcwd(), '..', '1_Data_IO', 'Data', 'holc.geojson'))
classes = np.unique(holc.HSG_SCALE)

stats_df_columns = ['Hazard_Index_Filename',
                    'Sigma', 'I_weight', 'T_weight', 'PurpleAir_RMSE_mpls',
                    ]

for c in classes:
    stat_names = [c + '_IndexMean', c + '_IndexMean_stdev']
    stats_df_columns += stat_names

stats_df = pd.DataFrame(columns = stats_df_columns,
                       )

In [3]:
# Load Interpolated PurpleAir

purpleair_path = os.path.join(os.getcwd(), '..', '4_Validate_Index_Surface', 'PurpleAir_Interpolation_Normalized.tif')

rasterio_rast = rasterio.open(purpleair_path)

PurpleAir = rasterio_rast.read(1)

rasterio_rast.close()

In [4]:
# Clip PurpleAir to just Minneapolis

# Boundary

mpls_path = os.path.join(os.getcwd(), '..', '1_Data_IO', 'Data', 'mpls_boundary.geojson')
mpls = gpd.read_file(mpls_path)

# Bounds

extent = mpls.geometry.total_bounds # minx, miny, maxx, maxy

minx = extent[0]
miny = extent[1]
maxx = extent[2]
maxy = extent[3]

# Load numpy template raster

raster_path = os.path.join(os.getcwd(), '..', '2_Model_Pollutant_Exposure', 'template.npy')

raster = np.load(raster_path)

# Find indices of raster within bounds

# in_bounds_x = np.logical_and(raster[0]>(minx-50), raster[0]<(maxx+50))
# in_bounds_y = np.logical_and(raster[1]>(miny-50), raster[1]<(maxy+50))
# in_bounds = np.logical_and(in_bounds_x, in_bounds_y)

in_bounds_x = np.logical_and(raster[0]>(minx), raster[0]<(maxx))
in_bounds_y = np.logical_and(raster[1]>(miny), raster[1]<(maxy))
in_bounds = np.logical_and(in_bounds_x, in_bounds_y)

# Select these from PurpleAir

PurpleAir_clipped = ma.array(PurpleAir.T, mask = np.invert(in_bounds))


In [5]:
# Iterate through to get stats

# Initialize

# Hazard_Index_Filenames

indices_path = os.path.join(os.getcwd(), 'Hazard_Indices')
index_filenames = os.listdir(indices_path)

# Paths to Zonal Stats
zonalstats_path = os.path.join(os.getcwd(), 'Zonal_Stats')

for i, index_filename in enumerate(index_filenames):
    
    # Get parameter information
    
    stats_df.loc[i, 'Hazard_Index_Filename'] = index_filename
    stats_df.loc[i, 'Sigma'] = index_filename.split('_')[0][:-3]
    stats_df.loc[i, 'I_weight'] = index_filename.split('_')[1].split('-')[0][:-1]
    stats_df.loc[i, 'T_weight'] = index_filename.split('_')[1].split('-')[1][:-1]
    
    # Compare with Purple Air
    index_path = os.path.join(indices_path, index_filename)
    with rasterio.open(index_path) as rast:
        hazard_index = rast.read(1)
        
    dif = PurpleAir_clipped - ma.array(hazard_index.T, mask = np.invert(in_bounds))
    rmse = np.sqrt(np.sum(dif**2))
    stats_df.loc[i, 'PurpleAir_RMSE_mpls'] = rmse
    
    # Get mean of each zonal type
    zonalstats_path = os.path.join(os.getcwd(), 'Zonal_Stats', index_filename[:-4] + '_Zonal_Stats.geojson')
    zonalstats = gpd.read_file(zonalstats_path)
    
    for c in classes:      
        
        mean = zonalstats[zonalstats.HSG_SCALE == c]['mean'].mean()
        std = zonalstats[zonalstats.HSG_SCALE == c]['mean'].std()
        stat_names = [c + '_IndexMean', c + '_IndexMean_stdev']
        stats_df.loc[i, stat_names] = [mean, std]

In [6]:
stats_df.head()

Unnamed: 0,Hazard_Index_Filename,Sigma,I_weight,T_weight,PurpleAir_RMSE_mpls,Best_IndexMean,Best_IndexMean_stdev,Business and Industrial_IndexMean,Business and Industrial_IndexMean_stdev,Definitely Declining_IndexMean,...,Open Water_IndexMean,Open Water_IndexMean_stdev,Park / Open Space_IndexMean,Park / Open Space_IndexMean_stdev,Still Desirable_IndexMean,Still Desirable_IndexMean_stdev,Uncertain_IndexMean,Uncertain_IndexMean_stdev,Undeveloped_IndexMean,Undeveloped_IndexMean_stdev
0,11.0sig_45I-55T_HazardIndex.tif,11.0,45,55,131.511031,0.094737,0.071565,0.222827,0.102698,0.158828,...,0.130855,0.083641,0.12664,0.103722,0.110532,0.087112,0.079678,0.058457,0.111997,0.093125
1,4.0sig_55I-44T_HazardIndex.tif,4.0,55,44,152.036742,0.030929,0.041456,0.104039,0.083201,0.063758,...,0.04532,0.052595,0.058675,0.065462,0.036886,0.04874,0.029416,0.026347,0.050125,0.063522
2,11.5sig_60I-40T_HazardIndex.tif,11.5,60,40,138.280668,0.072301,0.05408,0.180188,0.090245,0.122768,...,0.102285,0.063215,0.096108,0.07726,0.084628,0.065283,0.063616,0.046431,0.084801,0.070045
3,10.5sig_60I-40T_HazardIndex.tif,10.5,60,40,139.943639,0.066728,0.053004,0.170706,0.089503,0.115814,...,0.095331,0.061449,0.091536,0.076368,0.078427,0.063288,0.058546,0.04496,0.080649,0.06946
4,9.5sig_50I-50T_HazardIndex.tif,9.5,50,50,137.208071,0.075594,0.062951,0.18819,0.095793,0.131781,...,0.106361,0.072936,0.10668,0.09241,0.088694,0.075226,0.063903,0.050785,0.093915,0.083758


In [7]:
for column in stats_df.columns[1:]:
    stats_df[column] = pd.to_numeric(stats_df[column])
stats_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 145 entries, 0 to 144
Data columns (total 23 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   Hazard_Index_Filename                    145 non-null    object 
 1   Sigma                                    145 non-null    float64
 2   I_weight                                 145 non-null    int64  
 3   T_weight                                 145 non-null    int64  
 4   PurpleAir_RMSE_mpls                      145 non-null    float64
 5   Best_IndexMean                           145 non-null    float64
 6   Best_IndexMean_stdev                     145 non-null    float64
 7   Business and Industrial_IndexMean        145 non-null    float64
 8   Business and Industrial_IndexMean_stdev  145 non-null    float64
 9   Definitely Declining_IndexMean           145 non-null    float64
 10  Definitely Declining_IndexMean_stdev     145 non-n

In [8]:
# Save

stats_path = os.path.join(os.getcwd(), 'Sensitivity_Stats.csv')
stats_df.to_csv(stats_path, index = False)