# Import libraries

In [11]:
import time
import os
import logging
import xarray as xr

# Define functions

In [14]:
def create_dir(dir):
    """
    Checks if directory exists and if not it will create it
    
    dir: str
        full path the directory
    """
    if not os.path.exists(dir):
            os.mkdir(dir)

In [3]:
def list_dir_return_list_by_selection(workspace, selection):
    """
    Lists all files in a workspace and return the full path

    workspace: str
        full path the directory
    selection: str
        string used as a filter 
    """
    files = []
    for file in os.listdir(workspace):
        if selection in file:
            files.append(os.path.join(workspace,file))
    return(files)

In [4]:
def merge_md_rasters_LOCA2(file_historical, file_1, out_file, variable):

    try: 
        ds0 = xr.open_dataset(file_historical)
        ds1 = xr.open_dataset(file_1)

        # flip datasets upside down
        ds0 = ds0.isel(lat=slice(None, None, -1))
        ds1 = ds1.isel(lat=slice(None, None, -1))

        merge_result = xr.merge([ds0,ds1])

        # complevel 1 - 9. 1 is fastest and compresses least. 9 is slowest and compresses most
        merge_result.to_netcdf(out_file, encoding={variable.replace('-', '_'): {'zlib': True, 'complevel': 9}})

    # except starts if operation above fails
    except Exception as err:

        # logs info message if operations fails
        logging.info(err)

In [5]:
def merge_md_rasters_STAR(file_historical, file_1, file_2, out_file, variable):

    try: 
        ds0 = xr.open_dataset(file_historical)
        ds1 = xr.open_dataset(file_1)
        ds2 = xr.open_dataset(file_2)

        # flip datasets upside down
        ds0 = ds0.isel(latitude=slice(None, None, -1))
        ds1 = ds1.isel(latitude=slice(None, None, -1))
        ds2 = ds2.isel(latitude=slice(None, None, -1))

        merge_result = xr.merge([ds0,ds1,ds2])

        # complevel 1 - 9. 1 is fastest and compresses least. 9 is slowest and compresses most
        merge_result.to_netcdf(out_file, encoding={variable.replace('-', '_'): {'zlib': True, 'complevel': 9}})

    # except starts if operation above fails
    except Exception as err:

        # logs info message if operations fails
        logging.info(err)

# Prepare Logger and Open Log File

In [None]:
# Path to your base directory
base = r'C:\<your-base-path>\CRIS'

In [None]:
file_time = time.strftime("%Y%m%d-%H%M%S")

log_file_name = "Output_Log_" + file_time + ".log"

LOG_FILE_PATH = os.path.join(base, 'logs')
LOG_FILE = os.path.join(LOG_FILE_PATH, log_file_name)

logging.basicConfig(level = logging.INFO,
                    format="%(asctime)s:%(levelname)s: %(message)s",
                    handlers=[
                       logging.FileHandler(filename=LOG_FILE),
                       logging.StreamHandler()
                   ])
logging.getLogger()

# Set paths and variables

In [9]:
# data that will be processed needs to be downloaded into <base>\data\download\model_set\variable\model\

In [None]:
input_dir =  os.path.join(base, 'data', 'download')
output_dir = os.path.join(base, 'data', 'merge')

create_dir(output_dir)

In [11]:
# pick STAR or LOCA2
model_set = 'LOCA2'

# pick model
model = 'ACCESS-CM2'

# pick variable
variable = 'tavg'

if model_set == 'STAR':
    ssp_list = ['_ssp245','_ssp585']
elif model_set == 'LOCA2':
    ssp_list = ['_ssp245','_ssp370','_ssp585']

# Merge

In [None]:
start_proc_tm = time.time() 

netcdf_dir_list = os.path.join(input_dir, model_set, variable, model)

# Get a list of NetCDFs from the downloaded directory
netcdf_list = list_dir_return_list_by_selection(netcdf_dir_list, '')

# Strip xmls from the list
netcdf_list = list(set([str(item).strip(".xml").strip('.au') for item in netcdf_list]))

# Define the full directory path
output_dir_model_set = os.path.join(output_dir, model_set)

# Create an output folder for the model set
create_dir(output_dir_model_set)
logging.info(f"Created [{output_dir_model_set}] for processing")

# Define the full directory path
output_dir_model_set_variable = os.path.join(output_dir_model_set, variable)

# Create an output folder for the model variable
create_dir(output_dir_model_set_variable)
logging.info(f"Created [{output_dir_model_set_variable}] for processing")

# Execute the workflow by ssp in a for loop
for ssp in ssp_list:

    ssp_rasters = list_dir_return_list_by_selection(netcdf_dir_list, ssp)

    # Strip .xmls from the ssp_rasters list
    ssp_rasters = list(set([str(item).strip(".xml") for item in ssp_rasters]))

    start_rec_tm = time.time()
    
    # Split the file name by underscore and collect the parts for processing
    file_name_parts = os.path.basename(ssp_rasters[0]).strip('.nc').split("_")
    
    if model_set == 'STAR':

        # Collect file names parts
        model, variable, ssp, file, start_year, end_year = file_name_parts
        logging.info(f"Processing {os.path.join(netcdf_dir_list, '_'.join([model,variable,ssp]))}")

        file_historical = os.path.join(netcdf_dir_list, '_'.join([model,variable,ssp,'historical_1950_2014.nc']))
        file_1 = os.path.join(netcdf_dir_list, '_'.join([model,variable,ssp,'1_2015_2058.nc']))
        file_2 = os.path.join(netcdf_dir_list, '_'.join([model,variable,ssp,'2_2059_2100.nc']))

        # Define the output NetCDF filename and path
        out_netcdf = os.path.join(output_dir_model_set_variable, '_'.join([model_set, model, variable, ssp, '1950_2100.nc'])) 
        
        # Merge multidimensional rasters 
        merge_md_rasters_STAR(file_historical, file_1, file_2, out_netcdf, variable)

        end_red_tm = time.time()
        elapsed_rec_tm = end_red_tm - start_rec_tm
        logging.info(f"[Processed: {os.path.join(netcdf_dir_list, ':', ssp)} in {elapsed_rec_tm:0,.2f} seconds]")

    elif model_set == 'LOCA2':

        # Collect file names parts
        model, variable, ssp, start_year, end_year = file_name_parts
        logging.info(f"Processing {os.path.join(netcdf_dir_list, '_'.join([model,variable,ssp]))}")

        # Define the matching historical file
        file_historical = os.path.join(netcdf_dir_list, '_'.join([model,variable,'historical_1950_2014.nc']))
        file_1 = os.path.join(netcdf_dir_list, '_'.join([model,variable,ssp,'2015_2100.nc']))
        
        # Define the output NetCDF filename and path
        out_netcdf = os.path.join(output_dir_model_set_variable, '_'.join([model_set, model, variable, ssp, '1950_2100.nc'])) 
        
        # Merge multidimensional rasters
        merge_md_rasters_LOCA2(file_historical, file_1, out_netcdf, variable)

        end_red_tm = time.time()
        elapsed_rec_tm = end_red_tm - start_rec_tm
        logging.info(f"[Processed: {os.path.join(netcdf_dir_list, ':', ssp)} in {elapsed_rec_tm:0,.2f} seconds]")
        
logging.info("Processing complete.")
end_proc_tm = time.time()
elapsed_proc_tm = end_proc_tm - start_proc_tm

logging.info(f"[Overall Elapsed time: {elapsed_proc_tm:0,.2f} seconds]")