In [9]:
import pandas as pd
import geopandas as gpd
import os
import boto3
from zipfile import ZipFile
import shutil
import sys
import os
sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import (
    append_metadata
)

In [2]:
# @append_metadata
def reproject_slr_census_blocks(varname='climate_pacific_institute_slr_exposure_susceptibility'):
    """
    This function pulls Pacific Institute sea level rise data from AWS. The csv file is modified for
    reprojection. The census block column was used to create an additional spatial column for census
    tracts as census blocks are at a higher spatial scale.
    
    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    """
    # Initialize the S3 client
    s3_client = boto3.client('s3')
    
    # Bucket name and file paths
    bucket_name = 'ca-climate-index'
    directory = '1_pull_data/climate_risk/sea_level_rise/exposure/projections/pacific_institute/vulnerable_slr_tract_2100.gdb.zip'
    out_directory = '2b_reproject/climate_risk/sea_level_rise/exposure/projections/pacific_institute/'
    
    # Local directory to store the downloaded zip file and extracted contents
    local_directory = 'temp'
    if not os.path.exists(local_directory):
        os.makedirs(local_directory)
    
    # Download the zip file
    print(f'Pulling vulnerable census blocks to SLR data from S3 bucket: {directory}')
    local_zip_file_path = os.path.join(local_directory, os.path.basename(directory))
    s3_client.download_file(bucket_name, directory, local_zip_file_path)
    
    # Extract the contents of the zip file
    with ZipFile(local_zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(local_directory)
    
    csv_found = False

    for filename in os.listdir(local_directory):
        if filename.endswith('.csv'):
            csv_path = os.path.join(local_directory, filename)
            print("Reading csv file")
            print('')
            slr_vulnerable_data = pd.read_csv(csv_path)
            print('.csv data pulled')
            print('')
            print('Original data:')
            print(slr_vulnerable_data)
            print('Adding Census Tract column to dataset')
            print('')
            slr_vulnerable_data['CensusTract'] = slr_vulnerable_data['CensusBlock'].astype(str).str[:10]
            print('Updated data:')
            print(slr_vulnerable_data)
            print('')
            print('Saving data as new .csv file and uploading to AWS')
            print('')
            
            # Save the modified DataFrame to a CSV file
            output_csv_path = os.path.join(local_directory, 'climate_pacific_institute_slr_exposure_census_vulnerability.csv')
            slr_vulnerable_data.to_csv(output_csv_path, index=False)
            print(f"Reprojected data saved to: {output_csv_path}")

            # Upload the CSV file to S3
            with open(output_csv_path, 'rb') as file:
                s3_client.upload_fileobj(file, bucket_name, os.path.join(out_directory, 'climate_pacific_institute_slr_exposure_census_vulnerability.csv'))

            print(f"Reprojected SLR data sent to S3 bucket: {out_directory}")
            break  # Break out of the loop after processing the first CSV file

    if not csv_found:
        print("No other CSV files found in the directory.")
                

In [3]:
reproject_slr_census_blocks(varname='climate_pacific_institute_slr_exposure_susceptibility')

Pulling vulnerable census blocks to SLR data from S3 bucket: 1_pull_data/climate_risk/sea_level_rise/exposure/projections/pacific_institute/vulnerable_slr_tract_2100.gdb.zip
Reading csv file

.csv data pulled

Original data:
         CensusBlock  CountyFIPS    BlkArea_m2  Perc_2000  Perc_2100
0     60014017001002           1  7.400145e+04   0.013629   0.103142
1     60014017001009           1  1.661440e+04   0.162421   0.818744
2     60014017001011           1  2.034830e+04   0.033993   0.145896
3     60014017002024           1  2.437019e+04   0.007894   0.054463
4     60014017002025           1  1.384990e+04   0.218054   0.323897
...              ...         ...           ...        ...        ...
9415  61110073001115         111  6.546174e+04   0.008397   0.037215
9416  61110073001120         111  2.331707e+04   0.010518   0.024219
9417  61110073001121         111  3.143619e+06   0.000482   0.001053
9418  61110073001124         111  2.357066e+04   0.006471   0.271358
9419  6111007300

# Below: attempt to aggregate from census block to census tract level

In [100]:
census_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"
ca_boundaries = gpd.read_file(census_shp_dir)
# we need to reproject the Census Tract shapefile
# to a projection in meters since the original geographic projection
# (ie, lat-lon grid) is not area-preserving and will result
# in incorrect area totals.
ca_boundaries = ca_boundaries.to_crs('epsg:32633')
ca_boundaries = ca_boundaries.set_index("GEOID")
ca_boundaries["TractArea_m2"] = ca_boundaries["geometry"].area

In [101]:
df = pd.read_csv('temp/climate_pacific_institute_slr_exposure_census_vulnerability.csv')
df["Area_m2_2000"] = df["BlkArea_m2"] * df["Perc_2000"]
df["Area_m2_2100"] = df["BlkArea_m2"] * df["Perc_2100"]
df

Unnamed: 0,CensusBlock,CountyFIPS,BlkArea_m2,Perc_2000,Perc_2100,CensusTract,Area_m2_2000,Area_m2_2100
0,60014017001002,1,7.400145e+04,0.013629,0.103142,6001401700,1008.547945,7632.638329
1,60014017001009,1,1.661440e+04,0.162421,0.818744,6001401700,2698.528133,13602.935009
2,60014017001011,1,2.034830e+04,0.033993,0.145896,6001401700,691.697194,2968.732520
3,60014017002024,1,2.437019e+04,0.007894,0.054463,6001401700,192.389518,1327.279091
4,60014017002025,1,1.384990e+04,0.218054,0.323897,6001401700,3020.028033,4485.940436
...,...,...,...,...,...,...,...,...
9415,61110073001115,111,6.546174e+04,0.008397,0.037215,6111007300,549.685270,2436.179073
9416,61110073001120,111,2.331707e+04,0.010518,0.024219,6111007300,245.250031,564.712574
9417,61110073001121,111,3.143619e+06,0.000482,0.001053,6111007300,1515.852993,3310.928496
9418,61110073001124,111,2.357066e+04,0.006471,0.271358,6111007300,152.532491,6396.079857


In [102]:
tract_df = df[["CensusTract","BlkArea_m2","Area_m2_2000","Area_m2_2100"]]
tract_df = tract_df.rename(
    columns={"BlkArea_m2": "TotalBlkArea_m2",
            "CensusTract": "GEOID"})
tract_df["GEOID"] = tract_df["GEOID"].astype('str')
tract_df['GEOID'] = tract_df['GEOID'].str.zfill(11)
tract_df = tract_df.groupby("GEOID").sum()
tract_df.index = tract_df.index.str.zfill(11)
tract_df["TractArea_m2"] = ca_boundaries["TractArea_m2"]
tract_df

Unnamed: 0_level_0,TotalBlkArea_m2,Area_m2_2000,Area_m2_2100,TractArea_m2
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
06001401700,2.690568e+06,3.434571e+05,1.099307e+06,1.615779e+07
06001401900,2.153717e+06,9.125627e+04,5.111162e+05,
06001403200,8.139416e+04,5.945421e+02,3.503238e+04,
06001403300,1.675126e+05,7.313478e+03,1.954954e+04,
06001403400,2.931380e+05,9.715905e+04,1.403762e+05,
...,...,...,...,...
06111004600,1.682438e+07,1.460499e+07,1.598362e+07,3.142910e+07
06111004702,2.262305e+07,1.482112e+07,1.874741e+07,
06111004704,4.552837e+06,2.791642e+05,2.609226e+06,3.034404e+07
06111005600,2.232031e+07,4.485517e+06,9.340494e+06,


In [103]:
old_tracts = gpd.read_file("tr06_d00.shp")
old_tracts = old_tracts.set_crs("epsg:4269")
old_tracts = old_tracts.to_crs("epsg:32633")
old_tracts["TRACT"] = old_tracts["TRACT"].str.zfill(6)
old_tracts["GEOID"] = old_tracts["STATE"]+old_tracts["COUNTY"]+old_tracts["TRACT"]
old_tracts.index = old_tracts["GEOID"]
old_tracts["TractArea_m2"] = old_tracts["geometry"].area
old_tracts

Unnamed: 0_level_0,AREA,PERIMETER,TR06_D00_,TR06_D00_I,STATE,COUNTY,TRACT,NAME,LSAD,LSAD_TRANS,geometry,GEOID,TractArea_m2
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
06093000004,0.058290,1.303878,2,1,06,093,000004,0004,TR,,"POLYGON ((-2994764.972 14395298.412, -3000453....",06093000004,7.170062e+08
06093000003,0.155340,2.227876,3,2,06,093,000003,0003,TR,,"POLYGON ((-3035361.677 14369572.479, -3041882....",06093000003,1.926712e+09
06093000005,0.346458,3.272025,4,3,06,093,000005,0005,TR,,"POLYGON ((-2965258.052 14412987.554, -2968294....",06093000005,4.244273e+09
06093000002,0.148798,2.018557,5,4,06,093,000002,0002,TR,,"POLYGON ((-3052345.042 14359312.534, -3052417....",06093000002,1.850553e+09
06015000202,0.227852,3.327617,6,5,06,015,000202,0002.02,TR,,"POLYGON ((-2885248.707 14460839.830, -2892348....",06015000202,2.771576e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...
06073010111,0.000055,0.033442,7112,7111,06,073,010111,0101.11,TR,,"POLYGON ((-4186750.470 15157814.022, -4186908....",06073010111,9.411602e+05
06073010012,0.000061,0.033368,7113,7112,06,073,010012,0100.12,TR,,"POLYGON ((-4186908.290 15157892.268, -4187862....",06073010012,1.049570e+06
06073010109,0.001243,0.175055,7114,7113,06,073,010109,0101.09,TR,,"POLYGON ((-4185848.873 15158724.893, -4186464....",06073010109,2.128561e+07
06073010013,0.000091,0.044529,7115,7114,06,073,010013,0100.13,TR,,"POLYGON ((-4188161.540 15158098.108, -4188118....",06073010013,1.556902e+06


In [104]:
tract_df["OldTractArea_m2"] = old_tracts["TractArea_m2"]
tract_df

ValueError: cannot reindex on an axis with duplicate labels