In [1]:
import os
import time
import pathlib
import yaml
import math
from datetime import datetime

In [2]:
now = datetime.now()
dt_string = now.strftime("%d%B%Y")
dt_string

'14March2022'

In [3]:
startTime = time.time()
group_runs = 200 # Number of output files aggregated together
ngroups = 35  #This is a number >> max number of aggregations in any given oil type
#------------------------------------------------------------
# Global variables
#------------------------------------------------------------
oil_types = ['akns', 'bunker', 'dilbit', 'jet', 'diesel', 'gas', 'other']
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

In [4]:
import math
yaml_file=pathlib.Path('/scratch/rmueller/MIDOSS/Results/MOHID_results_locations_14032022_06:17:23.yaml')
#------------------------------------------------------------
# Output directory
#------------------------------------------------------------
# add date tag to sbatch .list filename
now = datetime.now()
dt_string = now.strftime("%d%B%Y")
#output_list_dir=f'/scratch/rmueller/MIDOSS/Results/{dt_string}' 
output_list_dir='/scratch/rmueller/MIDOSS/Results/ByOilType/14Mar2022'
# create output directory if it doesn't already exist
try:
    os.makedir(output_list_dir)
except:
    print(f'{output_list_dir} already exists')
sbatch_file = pathlib.Path(
    output_list_dir,
    f'aggregate_{group_runs}_oil_spills_{dt_string}.list')
#------------------------------------------------------------
# Load yaml file name with list of output netcdf files to aggregate
#------------------------------------------------------------
with yaml_file.open("rt") as f:
    run_paths = yaml.safe_load(f)   
n_iter={}
for oil in oil_types:
    n_iter[oil]=max(1,math.ceil(len(run_paths[oil])/group_runs))
#------------------------------------------------------------
# Iterate through oil types and aggregate SOILED model output
# in batches of "group_runs" number of files
#------------------------------------------------------------
all_iter = 0
with open(sbatch_file, 'w') as f:
    for oil in oil_types: # loop through oils
        this_iter = 1
        print(f'*** {oil} (Number of iterations: {n_iter[oil]}) ***')
        # loop through specified number of groups
        while (this_iter <= ngroups) & (n_iter[oil]>=this_iter): 
            print(f'Iteration: {this_iter} ')
            first = (this_iter-1) * group_runs 
            last = this_iter*group_runs
            #------------------------------------------------------------
            # write to file the sbatch submission command
            f.write(f"python aggregate_oil_spills.py '{yaml_file}' {oil} {first} {last} '{output_list_dir}'\n")
            this_iter += 1
            all_iter += 1
        executionTime = (time.time() - startTime)
        print(f'Execution time in minutes for {oil}: {executionTime/60:.2f}')
f.close

/scratch/rmueller/MIDOSS/Results/ByOilType/14Mar2022 already exists
*** akns (Number of iterations: 1) ***
Iteration: 1 
Execution time in minutes for akns: 0.09
*** bunker (Number of iterations: 17) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Iteration: 5 
Iteration: 6 
Iteration: 7 
Iteration: 8 
Iteration: 9 
Iteration: 10 
Iteration: 11 
Iteration: 12 
Iteration: 13 
Iteration: 14 
Iteration: 15 
Iteration: 16 
Iteration: 17 
Execution time in minutes for bunker: 0.09
*** dilbit (Number of iterations: 1) ***
Iteration: 1 
Execution time in minutes for dilbit: 0.09
*** jet (Number of iterations: 1) ***
Iteration: 1 
Execution time in minutes for jet: 0.09
*** diesel (Number of iterations: 32) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Iteration: 5 
Iteration: 6 
Iteration: 7 
Iteration: 8 
Iteration: 9 
Iteration: 10 
Iteration: 11 
Iteration: 12 
Iteration: 13 
Iteration: 14 
Iteration: 15 
Iteration: 16 
Iteration: 17 
Iteration: 18 
Iteration: 19 

<function TextIOWrapper.close()>

In [5]:
total_files = 0
for oil in oil_types:
    total_files += len(run_paths[oil])
total_files

9833

In [6]:
yaml_file=pathlib.Path('/scratch/rmueller/MIDOSS/Results/MOHID_results_locationsByMonth_14032022_14:03:39.yaml')
#------------------------------------------------------------
# Output directory
#------------------------------------------------------------
# add date tag to sbatch .list filename
now = datetime.now()
dt_string = now.strftime("%d%B%Y")
output_list_dir=f'/scratch/rmueller/MIDOSS/Results/ByMonth/{dt_string}' 

# create output directory if it doesn't already exist
try:
    os.makedirs(output_list_dir)
except:
    print(f'{output_list_dir} already exists')
sbatch_file = pathlib.Path(
    output_list_dir,
    f'aggregate_{group_runs}_oil_spills_{dt_string}.list')
#------------------------------------------------------------
# Load yaml file name with list of output netcdf files to aggregate
#------------------------------------------------------------
with yaml_file.open("rt") as f:
    run_paths = yaml.safe_load(f)   
n_iter={}
for month in months:
    n_iter[month]=max(1,math.ceil(len(run_paths[month])/group_runs))
#------------------------------------------------------------
# Iterate through oil types and aggregate SOILED model output
# in batches of "group_runs" number of files
#------------------------------------------------------------
all_iter = 0
with open(sbatch_file, 'w') as f:
    for month in months: # loop through oils
        this_iter = 1
        print(f'*** {month} (Number of iterations: {n_iter[month]}) ***')
        # loop through specified number of groups
        while (this_iter <= ngroups) & (n_iter[month]>=this_iter): 
            print(f'Iteration: {this_iter} ')
            first = (this_iter-1) * group_runs 
            last = this_iter*group_runs
            #------------------------------------------------------------
            # write to file the sbatch submission command
            f.write(f"python aggregate_oil_spills.py '{yaml_file}' {month} {first} {last} '{output_list_dir}'\n")
            this_iter += 1
            all_iter += 1
        executionTime = (time.time() - startTime)
        print(f'Execution time in minutes for {month}: {executionTime/60:.2f}')
f.close

*** Jan (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Jan: 0.17
*** Feb (Number of iterations: 3) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Execution time in minutes for Feb: 0.17
*** Mar (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Mar: 0.17
*** Apr (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Apr: 0.17
*** May (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for May: 0.17
*** Jun (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Jun: 0.17
*** Jul (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Jul: 0.17
*** Aug (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration:

<function TextIOWrapper.close()>

In [7]:
sbatch_file

PosixPath('/scratch/rmueller/MIDOSS/Results/ByMonth/14March2022/aggregate_200_oil_spills_14March2022.list')

In [8]:
sbatch_file

PosixPath('/scratch/rmueller/MIDOSS/Results/ByMonth/14March2022/aggregate_200_oil_spills_14March2022.list')

In [9]:
total_files = 0
for month in months:
    total_files += len(run_paths[month])
total_files

8434

### Create a secondary file for runs that didn't complete 

##### By month

In [10]:
input_dir='/home/rmueller/projects/def-allen/rmueller/MIDOSS/analysis-rachael/inputs/aggregation/ByMonth/SpillTime_00-30' 
yaml_file=pathlib.Path(input_dir,'MOHID_results_locationsByMonth_14122021_01:09:57.yaml')
output_dir='/scratch/rmueller/MIDOSS/Results/ByMonth/SpillTime_00-30_fillna0'
#output_list_dir=f'/scratch/rmueller/MIDOSS/Results/ByMonth/14December2021' 
sbatch_file = pathlib.Path(
    input_dir,
    f'aggregate_200_oil_spills_ByMonth_fillna0_LONG.list')
with yaml_file.open("rt") as f:
    run_paths = yaml.safe_load(f)   
n_iter={}
for month in months:
    n_iter[month]=max(1,math.ceil(len(run_paths[month])/group_runs))

In [11]:
#------------------------------------------------------------
# Iterate through oil types and aggregate SOILED model output
# in batches of "group_runs" number of files
#------------------------------------------------------------
with open(sbatch_file, 'w') as f:
    for month in months: # loop through oils
        this_iter = 1
        print(f'*** {month} (Number of iterations: {n_iter[month]}) ***')
        # loop through specified number of groups
        while (this_iter <= ngroups) & (n_iter[month]>=this_iter): 
            print(f'Iteration: {this_iter} ')
            first = (this_iter-1) * group_runs 
            last = this_iter*group_runs
            netcdf_file = pathlib.Path(
                    output_dir,
                    f'beaching_{month}_{first}-{last}.nc')
            if os.path.exists(netcdf_file)==False:
                f.write(f"python aggregate_oil_spills.py '{yaml_file}' {month} {first} {last} '{output_list_dir}'\n")
            this_iter += 1
        executionTime = (time.time() - startTime)
        print(f'Execution time in minutes for {month}: {executionTime/60:.2f}')
f.close

*** Jan (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Jan: 0.26
*** Feb (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Feb: 0.26
*** Mar (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Mar: 0.26
*** Apr (Number of iterations: 4) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Execution time in minutes for Apr: 0.26
*** May (Number of iterations: 5) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Iteration: 5 
Execution time in minutes for May: 0.28
*** Jun (Number of iterations: 5) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Iteration: 5 
Execution time in minutes for Jun: 0.28
*** Jul (Number of iterations: 5) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Iteration: 5 
Execution time in minutes for Jul: 0.29
*** Aug (Number of i

<function TextIOWrapper.close()>

##### By oil type

In [12]:
input_dir='/home/rmueller/projects/def-allen/rmueller/MIDOSS/analysis-rachael/inputs/aggregation/ByOil/SpillTime_00-30' 
yaml_file=pathlib.Path(input_dir,'MOHID_results_locations_27112021_12:05:17.yaml')
output_dir='/scratch/rmueller/MIDOSS/Results/ByOilType/SpillTime_00-30_fillna0'
sbatch_file = pathlib.Path(
    input_dir,
    f'aggregate_oil_spills_ByOil_fillna0_LONG2.list')
with yaml_file.open("rt") as f:
    run_paths = yaml.safe_load(f)   
n_iter={}
for oil in oil_types:
    n_iter[oil]=max(1,len(run_paths[oil])/group_runs)


In [13]:
#------------------------------------------------------------
# Iterate through oil types and aggregate SOILED model output
# in batches of "group_runs" number of files
#------------------------------------------------------------
with open(sbatch_file, 'w') as f:
    for oil in oil_types: # loop through oils
        this_iter = 1
        print(f'*** {oil} (Number of iterations: {n_iter[oil]}) ***')
        # loop through specified number of groups
        while (this_iter <= ngroups) & (n_iter[oil]>=this_iter): 
            print(f'Iteration: {this_iter} ')
            first = (this_iter-1) * group_runs 
            last = this_iter*group_runs
            netcdf_file = pathlib.Path(
                    output_dir,
                    f'beaching_{oil}_{first}-{last}.nc')
            if os.path.exists(netcdf_file)==False:
                f.write(f"python aggregate_oil_spills.py '{yaml_file}' {oil} {first} {last} '{output_list_dir}'\n")
            this_iter += 1
        executionTime = (time.time() - startTime)
        print(f'Execution time in minutes for {oil}: {executionTime/60:.2f}')
f.close

*** akns (Number of iterations: 1) ***
Iteration: 1 
Execution time in minutes for akns: 0.37
*** bunker (Number of iterations: 16.74) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Iteration: 5 
Iteration: 6 
Iteration: 7 
Iteration: 8 
Iteration: 9 
Iteration: 10 
Iteration: 11 
Iteration: 12 
Iteration: 13 
Iteration: 14 
Iteration: 15 
Iteration: 16 
Execution time in minutes for bunker: 0.38
*** dilbit (Number of iterations: 1) ***
Iteration: 1 
Execution time in minutes for dilbit: 0.38
*** jet (Number of iterations: 1) ***
Iteration: 1 
Execution time in minutes for jet: 0.38
*** diesel (Number of iterations: 31.195) ***
Iteration: 1 
Iteration: 2 
Iteration: 3 
Iteration: 4 
Iteration: 5 
Iteration: 6 
Iteration: 7 
Iteration: 8 
Iteration: 9 
Iteration: 10 
Iteration: 11 
Iteration: 12 
Iteration: 13 
Iteration: 14 
Iteration: 15 
Iteration: 16 
Iteration: 17 
Iteration: 18 
Iteration: 19 
Iteration: 20 
Iteration: 21 
Iteration: 22 
Iteration: 23 
Iteration: 24 


<function TextIOWrapper.close()>