## Cal-CRAI Reprojection -- Koordinates Floodplain Data
This notebook processes floodplain data sourced from Koordinates: \
https://koordinates.com/

Data reprojecting includes:
* Isolating for 100 year floodplain data exclusively
* Clipping data to California boundaries
* Reprojecting data to standardized coordinate reference system (EPSG:4269)

Output is uploaded to 2b_reproject directory within AWS

In [None]:
import geopandas as gpd
import os
import matplotlib.pyplot as plt
import boto3
import sys
import xarray as xr
import pandas as pd
from functools import wraps
import dask_geopandas
import re
sys.path.append(os.path.expanduser('../../'))
from scripts.utils.write_metadata import (
    append_metadata
)
import warnings
warnings.filterwarnings("ignore")

In [None]:
# @append_metadata
def reproject_floodplain(shp_fname, ca_boundaries, varname='', export=False, additional_comments='N/A'):
    """ 
    Given S3 URI which corresponds to a data shapefile and a shapefile with California Census Tract: 
    (1) reproject the data shapefile to the CRS of the California Census Tracts, 
    (2) clip to California, and 
    (3) send it off to S3.

    This function differs from the one in geospatial_reproject.ipynb since it handles the overlapping
    polygons which make up different flood zones. 

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in
    ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    
    Parameters
    ----------
    shp_fname: string
        Local main folder where the pulled files will be saved.
    ca_boundaries: string
        Name of the CA census tract shape file.
    export: bool
        If True, exports resulting file to S3.
        If False, will return the metadata.

    Script
    ------
    koordinates_reproject.ipynb
    """    
    s3_client = boto3.client('s3')  
    bucket_name = 'ca-climate-index' 
    # read in shapefile of interest from S3 and take a look at it
    gdf = gpd.read_file(shp_fname)
    print("Dropping entries which do not correspond to 100-year flood.")
    # remove entries of unknown flood risk
    gdf = gdf.dropna()
    # keep only 100-year flood entries
    gdf = gdf[gdf.FloodZone.str.contains("100")]  
    # drop unnecessary columns
    gdf = gdf[["geometry","FLD_ZONE","FloodZone"]]
    
    print(f"Original CRS of {varname}: {gdf.crs}")
    fig, ax = plt.subplots()
    gdf.plot(ax=ax, markersize=1)
    plt.title(f"{varname} on original projection")
    plt.show()

    
    # reproject the data to the census tract CRS and clip to California
    gdf_reprojected = gdf.to_crs(ca_boundaries.crs)
    print(f"{varname} reprojected from {gdf.crs} to {gdf_reprojected.crs} with geopandas to_crs() function.")

    clipped_gdf = gpd.overlay(gdf_reprojected, ca_boundaries, how='intersection')
    print(f"{varname} clipped to California boundaries via geopandas overlay using the 'intersection' method.")
    clipped_gdf = clipped_gdf.dissolve(by='USCB_GEOID')
    print("All polygons in a given tract have been aggregated to a single entry.")
    display(clipped_gdf)
    
    # visualize results
    fig, ax = plt.subplots()
    ca_boundaries.plot(ax=ax, color='white', edgecolor='black')
    clipped_gdf.plot(ax=ax, marker='o', color='red', markersize=1)
    plt.title(f"{varname} on new projection")
    plt.show()
  
    # write the reprojected file to disk - still looking for a way around this
    # if not os.path.exists(f"{varname}.gpkg"):
    clipped_gdf.to_file(f"{varname}.gpkg", driver="GPKG")
    print(f"{varname}.gpkg has been made")
    
    if shp_fname.endswith('.zip'):
        shp_fname = shp_fname.replace(
            'zip+',
            '')
        
    dest_path = shp_fname.replace(
        's3://ca-climate-index/',
        '')
    dest_path = re.sub(r'1_pull_data|2a_subset', '2b_reproject', dest_path)
    dest_path = dest_path.replace(dest_path.split('/')[-1],f"{varname}.gpkg")
    print(f"Reprojected data called {varname}.gpkg sent to S3 bucket: {dest_path}")
    
    if export == True:
        # upload it to S3
        s3_client.upload_file(f"{varname}.gpkg", f'{bucket_name}', f'{dest_path}'
        )
        
    os.remove(f"{varname}.gpkg")


In [None]:
# path to floodplain shapefile
shp_fname = 'zip+s3://ca-climate-index/1_pull_data/climate_risk/flood/exposure/koordinates/california-fema-100-year-floodplains.gdb.zip'
varname = 'climate_koordinates_floodplain'
# read in CA census tiger file
census_shp_dir = "s3://ca-climate-index/0_map_data/2021_tiger_census_tract/2021_ca_tract/"
ca_boundaries = gpd.read_file(census_shp_dir)
# need to rename columns so we don't have any duplicates in the final geodatabase
column_names = ca_boundaries.columns
new_column_names = ["USCB_"+column for column in column_names if column != "geometry"]
ca_boundaries = ca_boundaries.rename(columns=dict(zip(column_names, new_column_names)))

In [None]:
reproject_floodplain(shp_fname, ca_boundaries, varname=varname, export=True, additional_comments='N/A')