In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
input_files_path = '../../input'
intermediate_path = '../../intermediate'
run_manager_file = 'run_manager.csv'

In [3]:
run_manager_df = pd.read_csv(os.path.join(input_files_path, run_manager_file))
run_manager_df

Unnamed: 0,ESM,ESM_Input_Location,Output_Location,Reference_Dataset,Reference_Input_Location,Variable,Scenario,Ensemble,target_period,application_period,daily,monthly
0,GFDL-ESM4,/rcfs/projects/gcims/data/climate/cmip6/GFDL-ESM4,/rcfs/projects/gcims/data/climate/basd/GFDL-ESM4,W5E5v2,/rcfs/projects/gcims/data/climate/W5E5v2,pr,ssp245,r1i1p1f1,1970-2014,2015-2100,True,True
1,CanESM5,,/rcfs/projects/gcims/data/climate/basd/CanESM5,,,tas,ssp370,,,1950-2014,,
2,,,,,,hurs,,,,,,
3,,,,,,sfcWind,,,,,,
4,,,,,,rsds,,,,,,
5,,,,,,rlds,,,,,,
6,,,,,,tasmin,,,,,,
7,,,,,,tasmax,,,,,,


In [4]:
def remove_nas(x):
    return x[~pd.isnull(x)]

In [5]:
esms = remove_nas(run_manager_df['ESM'].values)
esm_input_paths = remove_nas(run_manager_df['ESM_Input_Location'].values)
output_paths = remove_nas(run_manager_df['Output_Location'].values)
ref_datasets = remove_nas(run_manager_df['Reference_Dataset'].values)
ref_datasets_paths = remove_nas(run_manager_df['Reference_Input_Location'].values)
variables = remove_nas(run_manager_df['Variable'].values)
scenarios = remove_nas(run_manager_df['Scenario'].values)
ensembles = remove_nas(run_manager_df['Ensemble'].values)
target_periods = remove_nas(run_manager_df['target_period'].values)
application_periods = remove_nas(run_manager_df['application_period'].values)
daily = remove_nas(run_manager_df['daily'].values)
monthly = remove_nas(run_manager_df['monthly'].values)

In [15]:
variables

array(['pr', 'tas', 'hurs', 'sfcWind', 'rsds', 'rlds', 'tasmin', 'tasmax'],
      dtype=object)

In [16]:
if ('tasmax' in variables) or ('tasmin' in variables):
    variables = np.union1d(np.setdiff1d(variables, ['tasmax', 'tasmin']), ['tas', 'tasrange', 'tasskew'])
variables

array(['hurs', 'pr', 'rlds', 'rsds', 'sfcWind', 'tas', 'tasrange',
       'tasskew'], dtype=object)

In [17]:
mesh_array = np.array(np.meshgrid(esms, 
                                  variables, 
                                  scenarios, 
                                  ensembles,
                                  ref_datasets, 
                                  target_periods, 
                                  application_periods)).T.reshape(-1,7)

In [18]:
mesh_df = pd.DataFrame(mesh_array, columns = ['ESM', 'Variable', 'Scenario', 'Ensemble', 'Reference_Dataset',
                                              'target_period', 'application_period'])
mesh_df

Unnamed: 0,ESM,Variable,Scenario,Ensemble,Reference_Dataset,target_period,application_period
0,GFDL-ESM4,hurs,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100
1,GFDL-ESM4,pr,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100
2,GFDL-ESM4,rlds,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100
3,GFDL-ESM4,rsds,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100
4,GFDL-ESM4,sfcWind,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100
...,...,...,...,...,...,...,...
59,CanESM5,rsds,ssp370,r1i1p1f1,W5E5v2,1970-2014,1950-2014
60,CanESM5,sfcWind,ssp370,r1i1p1f1,W5E5v2,1970-2014,1950-2014
61,CanESM5,tas,ssp370,r1i1p1f1,W5E5v2,1970-2014,1950-2014
62,CanESM5,tasrange,ssp370,r1i1p1f1,W5E5v2,1970-2014,1950-2014


In [19]:
mesh_df = pd.DataFrame(mesh_array, columns = ['ESM', 'Variable', 'Scenario', 'Ensemble', 'Reference_Dataset',
                                              'target_period', 'application_period'])
# Merge in esm input locations
mesh_df = mesh_df.merge(run_manager_df[['ESM', 'ESM_Input_Location']], on='ESM', how='inner')
# Merge in reference dataset input locations
mesh_df = mesh_df.merge(run_manager_df[['Reference_Dataset', 'Reference_Input_Location']], on='Reference_Dataset', how='inner')
# Merge in output paths
mesh_df = mesh_df.merge(run_manager_df[['ESM', 'Output_Location']], on='ESM', how='inner')
# Add daily and monthly bools
mesh_df['daily'] = daily[0]
mesh_df['monthly'] = monthly[0]
mesh_df

Unnamed: 0,ESM,Variable,Scenario,Ensemble,Reference_Dataset,target_period,application_period,ESM_Input_Location,Reference_Input_Location,Output_Location,daily,monthly
0,GFDL-ESM4,hurs,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100,/rcfs/projects/gcims/data/climate/cmip6/GFDL-ESM4,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/GFDL-ESM4,True,True
1,GFDL-ESM4,pr,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100,/rcfs/projects/gcims/data/climate/cmip6/GFDL-ESM4,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/GFDL-ESM4,True,True
2,GFDL-ESM4,rlds,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100,/rcfs/projects/gcims/data/climate/cmip6/GFDL-ESM4,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/GFDL-ESM4,True,True
3,GFDL-ESM4,rsds,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100,/rcfs/projects/gcims/data/climate/cmip6/GFDL-ESM4,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/GFDL-ESM4,True,True
4,GFDL-ESM4,sfcWind,ssp245,r1i1p1f1,W5E5v2,1970-2014,2015-2100,/rcfs/projects/gcims/data/climate/cmip6/GFDL-ESM4,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/GFDL-ESM4,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...
59,CanESM5,rsds,ssp370,r1i1p1f1,W5E5v2,1970-2014,1950-2014,,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/CanESM5,True,True
60,CanESM5,sfcWind,ssp370,r1i1p1f1,W5E5v2,1970-2014,1950-2014,,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/CanESM5,True,True
61,CanESM5,tas,ssp370,r1i1p1f1,W5E5v2,1970-2014,1950-2014,,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/CanESM5,True,True
62,CanESM5,tasrange,ssp370,r1i1p1f1,W5E5v2,1970-2014,1950-2014,,/rcfs/projects/gcims/data/climate/W5E5v2,/rcfs/projects/gcims/data/climate/basd/CanESM5,True,True


In [88]:
file_name = os.path.splitext(run_manager_file)[0]
job_file_name = f'{file_name}.job'
out_file_name = f'{file_name}.out'
job_file_name

'run_manager.job'

In [89]:
mesh_df.to_csv(os.path.join(intermediate_path, f'{file_name}_explicit_list.csv'), index=False)

In [65]:
slurm_params = pd.read_csv(os.path.join(input_files_path, 'slurm_parameters.csv'))
slurm_params

Unnamed: 0,parameter,value
0,account,ceds
1,time,10:00:00
2,partition,slurm
3,max_concurrent,3
4,email,<username>@<domain>
5,mail-type,END


In [72]:
account = slurm_params[slurm_params['parameter'] == 'account']['value'].values[0]
time = slurm_params[slurm_params['parameter'] == 'time']['value'].values[0]
partition = slurm_params[slurm_params['parameter'] == 'partition']['value'].values[0]
max_concurrent = slurm_params[slurm_params['parameter'] == 'max_concurrent']['value'].values[0]
email = slurm_params[slurm_params['parameter'] == 'email']['value'].values[0]
mail_type = slurm_params[slurm_params['parameter'] == 'mail-type']['value'].values[0]

In [87]:
with open(os.path.join(intermediate_path, job_file_name), 'w') as job_file:
    job_file.writelines(f"#!/bin/bash\n\n\n")
    job_file.writelines('# Slurm Settings\n')
    job_file.writelines(f"#SBATCH --account={account}\n")
    job_file.writelines(f"#SBATCH --partition={partition}\n")
    job_file.writelines(f"#SBATCH --job-name={job_file_name}\n")
    job_file.writelines(f"#SBATCH --time={time}\n")
    job_file.writelines(f"#SBATCH --mail-type={mail_type}\n")
    job_file.writelines(f"#SBATCH --mail-user={email}\n")
    job_file.writelines(f"#SBATCH --output=.out/{out_file_name}\n")
    job_file.writelines(f"#SBATCH --array=1-{mesh_df.shape[0]}%{max_concurrent}\n\n\n")
    job_file.writelines('# Load Modules\n')
    job_file.writelines('module load gcc/11.2.0\n')
    job_file.writelines('module load python/miniconda3.9\n')
    job_file.writelines('source /share/apps/python/miniconda3.9/etc/profile.d/conda.sh\n\n')
    job_file.writelines('# activate conda environment\n')
    job_file.writelines('conda activate basd_env\n\n')
    job_file.writelines('# Timing\n')
    job_file.writelines('start=`date +%s.%N`\n\n')
    job_file.writelines('# Run script\n')
    job_file.writelines(f"python ../python/main.py $SLURM_ARRAY_TASK_ID\n\n")
    job_file.writelines('# End timing and print runtime\n')
    job_file.writelines('end=`date +%s.$N`\n')
    job_file.writelines('runtime=$( echo "($end - $start) / 60" | bc -l )\n')
    job_file.writelines('echo "Run completed in $runtime minutes"\n')