# Import libraries

In [1]:
import time
import os
import logging
import shutil
import xarray as xr
from xclim.core import units

# Define functions

In [2]:
def create_dir(dir):
    """
    Checks if directory exists and if not it will create it
    
    dir: str
        full path the directory
    """
    if not os.path.exists(dir):
            os.mkdir(dir)

In [None]:
def list_dir_return_list_by_selection(workspace, ssp, model):
    """
    Lists all files in a workspace and return the full path

    workspace: str
        full path the directory
    selection: str
        string used as a filter 
    """
    files = []
    for file in os.listdir(workspace):
        if ssp in file and model in file:
            files.append(os.path.join(workspace,file))
    return(files)

In [4]:
def unit_conversion_temp_star(in_path, variable, f_name, out_path):

    ds = xr.open_dataset(in_path + '\\' + f_name + '.nc')

    ds[variable].attrs['units'] = 'degC'

    ds[variable] = units.convert_units_to(ds[variable], 'degF')

    ds.to_netcdf(out_path + '\\' + f_name + '.nc', encoding={variable.replace('-', '_'): {'zlib': True, 'complevel': 9}})

In [5]:
def unit_conversion_temp_loca2(in_path, variable, f_name, out_path):

    ds = xr.open_dataset(in_path + '\\' + f_name + '.nc')

    ds[variable].attrs['units'] = 'degK'

    ds[variable] = units.convert_units_to(ds[variable], 'degF')

    ds.to_netcdf(out_path + '\\' + f_name + '.nc', encoding={variable.replace('-', '_'): {'zlib': True, 'complevel': 9}})

In [6]:
def unit_conversion_precip(in_path, variable, f_name, out_path):

    ds = xr.open_dataset(in_path + '\\' + f_name + '.nc')

    ds[variable].attrs['units'] = 'mm'

    ds[variable] = units.convert_units_to(ds[variable], 'in')

    ds.to_netcdf(out_path + '\\' + f_name + '.nc', encoding={variable.replace('-', '_'): {'zlib': True, 'complevel': 9}})

# Prepare Logger and Open Log File

In [None]:
# Path to your base directory
base = r'C:\<your-base-path>\CRIS'

In [None]:
file_time = time.strftime("%Y%m%d-%H%M%S")

log_file_name = "Output_Log_" + file_time + ".log"

LOG_FILE_PATH = os.path.join(base, 'logs')
LOG_FILE = os.path.join(LOG_FILE_PATH, log_file_name)

logging.basicConfig(level = logging.INFO,
                    format="%(asctime)s:%(levelname)s: %(message)s",
                    handlers=[
                       logging.FileHandler(filename=LOG_FILE),
                       logging.StreamHandler()
                   ])
logging.getLogger()

# Set paths and variables

In [9]:
input_dir = os.path.join(base, 'data', 'merge')
output_dir = os.path.join(base, 'data', 'unitconvert')

create_dir(output_dir)

In [10]:
variables_day = ['pr-days-above-nonzero-90th',
                 'pr-days-above-nonzero-95th',
                 'pr-days-above-nonzero-99th',
                 'pr-days-ge-1in',
                 'pr-days-ge-2in',
                 'pr-days-ge-3in',
                 'pr-days-ge-4in',
                 'tmax-days-ge-85F',
                 'tmax-days-ge-86F',
                 'tmax-days-ge-90F',
                 'tmax-days-ge-95F',
                 'tmax-days-ge-100F',
                 'tmax-days-ge-105F',
                 'tmax-days-ge-110F',
                 'tmax-days-ge-115F',
                 'tmax-days-le-32F',
                 'tmin-days-ge-60F',
                 'tmin-days-ge-70F',
                 'tmin-days-ge-75F',
                 'tmin-days-ge-80F',
                 'tmin-days-ge-85F',
                 'tmin-days-ge-90F',
                 'tmin-days-le-0F',
                 'tmin-days-le-28F',
                 'tmin-days-le-32F'
                 ]

variables_degreedays = ['cdd',
                        'hdd']

variables_temp = ['tavg',
                  'tmax',
                  'tmax1day',
                  'tmean-jja',
                  'tmin',
                  'tmin1day',
                  'tmin-jja'
                 ]

variables_precip = ['pr-above-nonzero-90th',
                    'pr-above-nonzero-95th',
                    'pr-above-nonzero-99th',
                    'pr-annual',
                    'prmax1day',
                    'prmax5day',
                    'prmax10day',
                    'prmax20day',
                    'prmax30day',
                    'prmaz-seasonal',
                    'pr-monthly'
                    ]


In [11]:
# Pick STAR or LOCA2
model_set = 'LOCA2'

# pick model
model = 'ACCESS-CM2'

# if variable has unit day, copy from input_dir to output_dir
variable = 'tavg'

ssp_list = ['_ssp245','_ssp370','_ssp585']

# Convert units

In [None]:
start_proc_tm = time.time() 

# Define a local directory
netcdf_dir_list = os.path.join(input_dir, model_set, variable)

# Define the full directory path
output_dir_model_set = os.path.join(output_dir, model_set)

# Create an output folder for the model set
create_dir(output_dir_model_set)
logging.info(f"Created [{output_dir_model_set}] for processing")

# Define the full directory path
output_dir_model_set_variable = os.path.join(output_dir_model_set, variable)

# Create an output folder for the model variable
create_dir(output_dir_model_set_variable)
logging.info(f"Created [{output_dir_model_set_variable}] for processing")

# Execute the workflow by ssp in a for loop
for ssp in ssp_list:

    ssp_rasters = list_dir_return_list_by_selection(netcdf_dir_list, ssp, model)

    # Strip .xmls from the ssp_rasters list
    ssp_rasters = list(set([str(item).strip(".xml") for item in ssp_rasters]))
    total_recs = len(ssp_rasters)
    logging.info(f"There are [{total_recs}] files in SSP[{ssp.strip('_ssp')}]")
    rec_count = 0

    start_rec_tm = time.time()
    
    for netcdf in ssp_rasters:

        rec_count += 1
        start_rec_tm = time.time()
        
        # Split the file name by unpderscore and collect the parts for processing
        file_name_parts = os.path.basename(netcdf).strip('.nc').split("_")
        
        # Collect file names parts
        model_set, model, variable, ssp, start_year, end_year = file_name_parts
        logging.info(f"[{rec_count}/{total_recs}] Processing {model_set}:{ssp.strip('_ssp')}:{os.path.basename(netcdf)}")

        # if variable has unit 'day', copy to output_dir
        if variable in variables_day or variable in variables_degreedays:

            input_file = os.path.join(input_dir, model_set, variable, os.path.basename(netcdf))
            output_file = os.path.join(output_dir, model_set, variable, os.path.basename(netcdf))

            shutil.copy(input_file, output_file)

            logging.info(f"[Copied {netcdf} to {output_dir_model_set_variable}]")

        # if variable has unit Celsius or Kelvin, convert unit to Fahrenheit and save to output_dir
        elif variable in variables_temp:

            try:

                if model_set == 'STAR':

                    unit_conversion_temp_star(netcdf_dir_list, variable, os.path.basename(netcdf).strip('.nc'), output_dir_model_set_variable)
                    
                    end_red_tm = time.time()
                    elapsed_rec_tm = end_red_tm - start_rec_tm
                    logging.info(f"[Converted and saved {netcdf} to {os.path.join(output_dir_model_set_variable, os.path.basename(netcdf))} in {elapsed_rec_tm:0,.2f} seconds]")

                if model_set == 'LOCA2':

                    unit_conversion_temp_loca2(netcdf_dir_list, variable, os.path.basename(netcdf).strip('.nc'), output_dir_model_set_variable)
                    
                    end_red_tm = time.time()
                    elapsed_rec_tm = end_red_tm - start_rec_tm
                    logging.info(f"[Converted and saved {netcdf} to {os.path.join(output_dir_model_set_variable, os.path.basename(netcdf))} in {elapsed_rec_tm:0,.2f} seconds]")

            except:

                logging.info(f"Failed to process {netcdf}")

        # if variable has unit mm, convert unit to inches and save to output_dir
        elif variable in variables_precip:

            try:

                unit_conversion_precip(netcdf_dir_list, variable, os.path.basename(netcdf).strip('.nc'), output_dir_model_set_variable)
                
                end_red_tm = time.time()
                elapsed_rec_tm = end_red_tm - start_rec_tm
                logging.info(f"[Converted and saved {netcdf} to {os.path.join(output_dir_model_set_variable, os.path.basename(netcdf))} in {elapsed_rec_tm:0,.2f} seconds]")

            except:

                logging.info(f"Failed to process {netcdf}")

logging.info("Processing complete.")
end_proc_tm = time.time()

elapsed_proc_tm = end_proc_tm - start_proc_tm

logging.info(f"[Overall Elapsed time: {elapsed_proc_tm/60:0,.2f} minutes]")