In [1]:
from pathlib import Path
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Define functions

In [2]:
def create_dir(dir):
    """
    Checks if directory exists and if not it will create it

    dir: str
        full path the directory
    """
    path = Path(dir)
    if not path.exists():
        path.mkdir(parents=True, exist_ok=True)

In [3]:
def list_dir_return_list_by_selection(workspace, selection):
    """
    Lists all files in a workspace and return the full path

    workspace: str
        full path the directory
    selection: str
        string used as a filter 
    """
    files = []
    for file in Path(workspace).iterdir():
        if selection in file.name:
            files.append(file.resolve())
    return files

In [4]:
def clean_list_xml(file_list):
    """
    Removes files from the list that end with '.csv'.

    file_list: list
        List of file paths as strings.
    """
    # Convert strings to Path objects
    paths = [Path(file) for file in file_list]
    
    # Filter out files that end with '.csv'
    filtered_files = [file for file in paths if not file.name.endswith('.xml')]
    
    return filtered_files

In [5]:
# surpress warning due to adding model and gwl to dataframe
pd.options.mode.chained_assignment = None

# Load Global Warming Level years

In [6]:
GWLS = ["15", "20", "30", "40"]

GWL_dict = {"ACCESSCM2":    {"15": 2025, "20": 2038, "30": 2055, "40": 2072},
            "ACCESSESM15": {"15": 2027, "20": 2039, "30": 2060, "40": 2078},
            "BCCCSM2MR":   {"15": 2030, "20": 2043, "30": 2065, "40": -888},
            "CanESM5":       {"15": 2012, "20": 2022, "30": 2040, "40": 2054},
            "ECEarth3":     {"15": 2024, "20": 2035, "30": 2057, "40": 2073},
            "FGOALSf3L":   {"15": 2022, "20": 2037, "30": 2060, "40": 2079},
            "FGOALSg3":     {"15": 2022, "20": 2037, "30": 2060, "40": 2079},
            "GFDLESM4":     {"15": 2039, "20": 2052, "30": 2075, "40": -888},
            "INMCM48":     {"15": 2030, "20": 2046, "30": 2069, "40": -888},
            "INMCM50":     {"15": 2030, "20": 2046, "30": 2074, "40": -888},
            "IPSLCM6ALR":  {"15": 2018, "20": 2034, "30": 2050, "40": 2066},
            "MIROC6":        {"15": 2040, "20": 2053, "30": 2076, "40": -888},
            "MPIESM12HR": {"15": 2033, "20": 2049, "30": 2073, "40": -888},
            "MPIESM12LR": {"15": 2033, "20": 2049, "30": 2071, "40": 2091},
            "MRIESM20":    {"15": 2026, "20": 2038, "30": 2064, "40": 2083}, 
            "NorESM2LM":    {"15": 2042, "20": 2056, "30": 2077, "40": -888},
            "NorESM2MM":    {"15": 2039, "20": 2054, "30": 2076, "40": -888}
}

NOT_REACHED = -888

# Set paths and variables

In [7]:
input_dir = r'C:\Users\raf14049\OneDrive - Esri\Documents\CRIS\zonalstats\out'
output_dir = r'C:\Users\raf14049\OneDrive - Esri\Documents\CRIS\globalwarminglevels'

In [8]:
# Pick STAR or LOCA2
model_set = 'STAR'

# pick variable
variable = 'cdd'

# pick ssp scenario(s)
ssp_list = ['_ssp245','_ssp370','_ssp585']

# pick zone
zone = 'TribalAreas'

# Calculate climate state for global warming levels

In [9]:
start_proc_tm = time.time() 

# create path to CSV's
csv_dir_list = Path(input_dir) / model_set / zone

# Define and create the full output path
output_dir_model_set_zone = Path(output_dir) / model_set / zone
create_dir(output_dir_model_set_zone)
print(f"Created [{output_dir_model_set_zone}] for processing")

# Execute the workflow by ssp in a for loop
for ssp in ssp_list:

    # get list of CSV's
    ssp_rasters = list_dir_return_list_by_selection(csv_dir_list, ssp)

    # Strip .xmls from the ssp_rasters list
    ssp_rasters = clean_list_xml(ssp_rasters)
    
    total_recs = len(ssp_rasters)
    print(f"There are [{total_recs}] files in SSP[{ssp.strip('_ssp')}]")
    rec_count = 0

    start_rec_tm = time.time()
    
    # create an empty dataframe
    climate_state_model_row = pd.DataFrame(columns=['GEOID', 'MODEL', 'GWL', 'COUNT', 'AREA', 'VARIABLE',  'MIN', 'MAX', 'MEAN'])
    climate_state_model = pd.DataFrame(columns=['GEOID', 'MODEL', 'GWL', 'COUNT', 'AREA', 'VARIABLE', 'MIN', 'MAX', 'MEAN'])

    # loop over models
    for csv in ssp_rasters:

        rec_count += 1
        start_rec_tm = time.time()

        # Split the file name by underscore and collect the parts for processing
        file_name_parts = csv.name.strip('.csv').split('_')

        # Collect file names parts
        model_set, model, variable = file_name_parts[:3]
        print('Processing:', model_set, model, variable)

        # open .csv's with pathlib - use file_path.read_text()?
        raster_hist = pd.read_csv(Path(csv.parent) / f"{model_set}_{model}_{variable}_historical.csv")
        raster_ssp =  pd.read_csv(csv)
        raster_all = pd.concat([raster_hist, raster_ssp], ignore_index=True)
        
        # loop over GWL years
        for gwl in GWLS:
            
            print('Global Warming Level:', gwl)

            # sort dataframe by geoid
            raster_sorted = raster_all.sort_values(by=['GEOID', 'Standard Time']).reset_index(drop=True)

            # lookup GWL midpoint year
            gwl_year = int(GWL_dict.get(model).get(gwl))

            if gwl_year == NOT_REACHED:
                print("GWL not reached by {model_set}:{model}:{variable}")
                continue

            for geoid in np.unique(raster_all['GEOID']):            
                
                # select all rows with geoid
                raster_geoid = raster_all[raster_all['GEOID'] == geoid]

                # select 20y period around gwl
                raster_geoid_20y = raster_geoid[(raster_geoid['Standard Time'] >= gwl_year-9) & (raster_geoid['Standard Time'] <= gwl_year+10)]

                # add model and gwl to columns
                raster_geoid_20y.loc[:,'MODEL'] = model
                raster_geoid_20y.loc[:,'GWL'] = gwl

                # Group by GEOID and calculate the mean over the 20y period around the gwl year
                climate_state_model_row = raster_geoid_20y.groupby('GEOID').agg(
                                            MODEL = ('MODEL', lambda x: model),
                                            GWL = ('GWL', lambda x: gwl),
                                            COUNT = ('COUNT', 'first'),
                                            AREA = ('AREA', 'first'),
                                            VARIABLE = ('Variable', 'first'),
                                            MIN = ('MIN', 'mean'),
                                            MAX = ('MAX', 'mean'),
                                            MEAN = ('MEAN', 'mean')
                                           ).reset_index()
                
                # add each geoid to the larger dataframe
                climate_state_model = pd.concat([climate_state_model, climate_state_model_row])
    
    # drop index column
    climate_state_model = climate_state_model.reset_index().drop('index', axis=1)
    
    # Group by GWL, YEAR, GEOID, and VARIABLE, and calculate the average for MIN, MAX, and MEAN between models
    climate_state = climate_state_model.groupby(['GEOID', 'GWL', 'COUNT', 'AREA', 'VARIABLE']).agg(
                        MIN=('MIN', 'mean'),
                        MAX=('MAX', 'mean'),
                        MEAN=('MEAN', 'mean')
                    ).reset_index()

    end_red_tm = time.time()
    elapsed_rec_tm = end_red_tm - start_rec_tm
    print(f"[Processed: {os.path.join(csv_dir_list, ':', ssp)} in {elapsed_rec_tm:0,.2f} seconds]")

    # path to save output to
    out_raster_path = Path(output_dir) / model_set / zone / f"{'_'.join([model_set, variable, ssp.strip('_')])}.csv"

    # save the climate state (averaged over models) for each ssp as csv
    climate_state.to_csv(out_raster_path, index=False, header=True)

print("Processing complete.")
end_proc_tm = time.time()

elapsed_proc_tm = end_proc_tm - start_proc_tm

print(f"[Overall Elapsed time: {elapsed_proc_tm/60:0,.2f} minutes]")

Created [C:\Users\raf14049\OneDrive - Esri\Documents\CRIS\globalwarminglevels\STAR\TribalAreas] for processing
There are [2] files in SSP[585]
Processing: STAR ACCESSCM2 cdd
Global Warming Level: 15


  climate_state_model = pd.concat([climate_state_model, climate_state_model_row])


Global Warming Level: 20
Global Warming Level: 30
Global Warming Level: 40
Processing: STAR ACCESSESM15 cdd
Global Warming Level: 15
Global Warming Level: 20
Global Warming Level: 30
Global Warming Level: 40
[Processed: C:\Users\raf14049\OneDrive - Esri\Documents\CRIS\zonalstats\out\STAR\TribalAreas\:\_ssp585 in 10.37 seconds]
Processing complete.
[Overall Elapsed time: 0.35 minutes]
