## Cal-CRAI Metric Calculation
Domain: Natural Systems \
Indicator: Ecosystem condition

This notebook calculates one metric, sourced from the California Department of Fish and Wildlife:
* Metric 1: Ecoregion Biodiversity Weight score

In [1]:
import pandas as pd
import os
import sys
import boto3
import io
import geopandas as gpd

# suppress pandas purely educational warnings
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

sys.path.append(os.path.expanduser('../../'))
from scripts.utils.file_helpers import pull_gpkg_from_directory, upload_csv_aws
from scripts.utils.write_metadata import append_metadata

In [None]:
# pull csv from aws
bucket_name = 'ca-climate-index'
aws_dir = '2b_reproject/natural_systems/ecosystem_condition/ca_dept_fish_wildlife/species_biodiversity/'

pull_gpkg_from_directory(bucket_name, aws_dir)

In [3]:
species_biodiversity_data = gpd.read_file('natural_fws_biodiversity.gpkg')

In [None]:
species_biodiversity_data.columns

In [None]:
species_biodiversity_data

SpBioRnkEco - Ranks of 1-5 assigned to the ecoregionally normalized biodiversity values, with all zero values removed and remaining values broken into 5 quantiles.
SpBioWtEco - Aggregated total of ecoregionally normalized biodiversity values including native species richness, rare species richness, and rarity weighted index. Final sum is        re-normalized to 0-1 statewide for ease of interpretation.


In [None]:
columns = ['Hex_ID', 'Name', 'SpBioWtEco', 'USCB_GEOID', 'geometry'
    
]
filtered_species_biodiversity_data = species_biodiversity_data[columns]
filtered_species_biodiversity_data

In [None]:
print(species_biodiversity_data.SpBioWtEco.max())
print(species_biodiversity_data.SpBioWtEco.min())
print(species_biodiversity_data.SpBioWtEco.mean())

In [8]:
# read in CA census tiger file
ca_tract_county = "s3://ca-climate-index/0_map_data/ca_tracts_county.csv"
ca_tract_county = gpd.read_file(ca_tract_county)
ca_tract_county = ca_tract_county.drop(columns={'field_1', 'geometry'})
ca_tract_county = ca_tract_county.rename(columns={'TRACT':'USCB_GEOID'})

In [None]:
ca_tract_county

In [None]:
biodiversity_merge = pd.merge(ca_tract_county, filtered_species_biodiversity_data, on='USCB_GEOID', how='left')
biodiversity_merge

In [None]:
columns_to_check = ['USCB_GEOID', 'COUNTYFP', 'County', 'Hex_ID', 'Name', 'SpBioWtEco']

# Dropping duplicates based on the specified columns
filtered_species_biodiversity_merge = biodiversity_merge.drop_duplicates(subset=columns_to_check, keep='first')
print(len(filtered_species_biodiversity_merge))

In [None]:
check_tract = filtered_species_biodiversity_merge[filtered_species_biodiversity_merge['USCB_GEOID'] == '06001400100']
check_tract


In [13]:
# Group by USCB_GEOID and calculate the mean for SpBioRnkEco and SpBioWtEco
processed_species_biodiversity = filtered_species_biodiversity_merge.groupby('USCB_GEOID').agg({
    'SpBioWtEco': 'mean'
}).reset_index()

In [None]:
processed_species_biodiversity

In [None]:
# Count the number of NaN values in each specified column
num_nan_SpBioWtEco = filtered_species_biodiversity_merge['SpBioWtEco'].isna().sum()

print(f"Number of NaN values in SpBioWtEco: {num_nan_SpBioWtEco}")


In [16]:
processed_species_biodiversity.to_csv('natural_species_biodiversity_metric.csv', index=False)

## Function Call

In [17]:
@append_metadata
def species_biodiversity_upload(input_csv, export=False, varname=''):
    '''
    Uploads the species diversity metric to S3 bucket. The metric is:
    
    * Ecoregion Biodiversity Weight score

    Data for this metric was sourced from the California Department of Fish and Wildlife at:
    https://apps.wildlife.ca.gov/ace/

    Methods
    -------
    Relevant data columns were isolated, some were renamed for later merging with California tract data.
    Duplicate rows based on biodiversity score and location were dropped.
    Data was merged to California tract data.
    Data was then grouped by tracts and averaged to get a single score for each tract.
    
    Parameters
    ----------
    input_csv: string
        csv biodiversity metric data 
    export: True/False boolean
        False = will not upload resulting df containing CAL CRAI biodiversity metric to AWS
        True = will upload resulting df containing CAL CRAI biodiversity metric to AWS

    Script
    ------
    natural_species_biodiversity.ipynb

    Note:
    This function assumes users have configured the AWS CLI such that their access key / secret key pair are stored in ~/.aws/credentials.
    See https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html for guidance.
    '''
    print('Data transformation: relevant columns were isolated and renamed.')
    print('Data transformation: biodiversity data were merged with California census tract data.')
    print('Data transformation: duplicate rows based on biodiversity score and location were dropped')
    print('Data transformation: data were then grouped by census tract and averaged so each tract has a single score.')

    if export == True:
        bucket_name = 'ca-climate-index'
        directory = '3_fair_data/index_data'
        export_filename = [input_csv]
        upload_csv_aws(export_filename, bucket_name, directory)

    if export == False:
        print(f'{input_csv} uploaded to AWS.')
 
    '''if os.path.exists(input_csv):
        os.remove(input_csv)'''

In [18]:
input_csv = 'natural_species_biodiversity_metric.csv'
var = 'natural_fws_biodiversity'

species_biodiversity_upload(input_csv, export=True, varname='test')