# Create .yaml file of dictionary with filenames sorted by oil type and whether completed or in queue

In [7]:
import numpy as np
import pandas as pd
import os
import pathlib
import xarray as xr
import yaml
from matplotlib import pyplot as plt, cm, colors
from cmocean import cm as cmo
from datetime import datetime
from glob import glob
#from tqdm.notebook import tqdm

%matplotlib inline
plt.rcParams.update({'font.size': 12, 'axes.titlesize': 12})

In [8]:
# Results path
results_dir = '/scratch/dlatorne/MIDOSS/runs/monte-carlo'

In [3]:
from glob import glob
runsets = sorted(glob(os.path.join(results_dir,"near-BP_*")))

In [4]:
runsets

['/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_10th-100_2021-09-25T162640',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_11th-100_2021-09-25T163107',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_12th-100_2021-09-25T163246',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_13th-100_2021-09-25T163257',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_14th-100_2021-09-25T163437',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_15th-100_2021-09-25T163657',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_15th-100_2021-09-25T164044',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_16th-100_2021-09-25T163910',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_16th-100_2021-09-25T164543',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_17th-100_2021-09-30T145322',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_18th-100_2021-09-30T145416',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_19th-100_2021-09-30T145439',
 '/scratch/dlatorne/MIDOSS/r

In [5]:
runsets_5 = runsets[:5]

In [6]:
runsets_5

['/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_10th-100_2021-09-25T162640',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_11th-100_2021-09-25T163107',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_12th-100_2021-09-25T163246',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_13th-100_2021-09-25T163257',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_14th-100_2021-09-25T163437']

In [21]:
#sorted(glob(os.path.join('/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_10th-100_2021-09-25T162640/results/*')))

In [7]:
runset = runsets_5[0]

In [15]:
list.append(list)
list[0]

'/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_10th-100_2021-09-25T162640/results/near-BP_10th-100-0'

In [20]:
runs = []
for runset in runsets_5:
    runs.extend(sorted(glob(os.path.join(runset,'results','near-BP_*')))[:])

/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_10th-100_2021-09-25T162640
/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_11th-100_2021-09-25T163107
/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_12th-100_2021-09-25T163246
/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_13th-100_2021-09-25T163257
/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_14th-100_2021-09-25T163437


In [24]:
netcdf_files = []
for run in runs:
    netcdf_files.extend(sorted(glob(os.path.join(run,'Lagrangian*.nc')))[:])

In [26]:
netcdf_files[0]

'/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_10th-100_2021-09-25T162640/results/near-BP_10th-100-0/Lagrangian_bunker-0_near-BP_10th-100-0.nc'

In [28]:
oiltype = netcdf_files[0].split('/')[-1].split('_')[1].split('-')[0]
oiltype

'bunker'

In [29]:
oil_types = [
    'akns', 
    'bunker', 
    'dilbit', 
    'jet', 
    'diesel', 
    'gas', 
    'other'
]

In [41]:
file_boolean = {}
files = {}
file_boolean[oil_types[0]] = [oil_types[0] in file for file in netcdf_files]
files[oil_types[0]]=[file for i,file in enumerate(netcdf_files)  if file_boolean[oil_types[0]][i]]

In [42]:
files[oil_types[0]] 

['/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_13th-100_2021-09-25T163257/results/near-BP_13th-100-61/Lagrangian_akns-61_near-BP_13th-100-61.nc',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_14th-100_2021-09-25T163437/results/near-BP_14th-100-21/Lagrangian_akns-21_near-BP_14th-100-21.nc',
 '/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_14th-100_2021-09-25T163437/results/near-BP_14th-100-94/Lagrangian_akns-94_near-BP_14th-100-94.nc']

### time to pull all this together

In [9]:
def get_MOHID_netcdf_filenames(results_dir, output_dir):
    """Get lists of filepaths and filenames for netcdf files of model output, 
    grouped by oil types. NOTE: jet and gas are run as diesel; other is run 
    as bunker.  
    
    :param str results_dir: File path for root directory of run sets. 
    On Graham, the filepath is `/scratch/dlatorne/MIDOSS/runs/monte-carlo`
    
    :param str output_dir: File path for storing MOHID_results_locations_{date}.yaml,
    which contains file paths for completed runs, sorted by oil type.  
    
    :return: Dataframe of file paths and names, sorted by oil types, namely: 
    akns, bunker, dilbit, jet, diesel, gas and other.  Note: jet and gas are 
    run as diesel; other is run as bunker.  
    :rtype: :py:class:`pandas.DataFrame`
    """
    oil_types = [
        'akns', 
        'bunker', 
        'dilbit', 
        'jet', 
        'diesel', 
        'gas', 
        'other'
    ]
    # get list of runsets
    runsets = sorted(glob(os.path.join(results_dir,"near-BP_*")))
    # get list of runs within each runset
    runs = []
    for runset in runsets:
        runs.extend(sorted(
            glob(os.path.join(runset,'results','near-BP_*')))[:])        
    # get complete list of netcdf files
    netcdf_files = []
    for run in runs:
        netcdf_files.extend(sorted(
            glob(os.path.join(run,'Lagrangian*.nc')))[:])
    # sort filenames by oil type.  
    file_boolean = {}
    files = {}
    files['all'] = []
    for oil in oil_types:
        file_boolean[oil] = [oil in file for file in netcdf_files]
        files[oil]='test'#[file for i,file in enumerate(netcdf_files) \
                    #if file_boolean[oil][i]]
        files['all'].extend(files[oil])
    
    # write filenames to .yaml with timestamp ID
    now = datetime.now()
    dt_string = now.strftime("%d%m%Y_%H:%M:%S")
    out_f = output_dir+f'/MOHID_results_locations_{dt_string}.yaml'
    print(out_f)
    with open(out_f, 'w') as output_yaml:
        documents = yaml.safe_dump(files, output_yaml)
    
    return files

In [10]:
%%time
output_dir = '/home/rmueller/projects/def-allen/rmueller/MIDOSS/Visualization'
files = get_MOHID_netcdf_filenames(results_dir, output_dir)

/home/rmueller/projects/def-allen/rmueller/MIDOSS/Visualization/MOHID_results_locations_07102021_15:19:03.yaml
CPU times: user 481 ms, sys: 781 ms, total: 1.26 s
Wall time: 3.77 s


In [55]:
files['akns'][0]

'/scratch/dlatorne/MIDOSS/runs/monte-carlo/near-BP_13th-100_2021-09-25T163257/results/near-BP_13th-100-61/Lagrangian_akns-61_near-BP_13th-100-61.nc'

In [56]:
oil_types = [
        'akns', 
        'bunker', 
        'dilbit', 
        'jet', 
        'diesel', 
        'gas', 
        'other'
    ]
for oil in oil_types:
    print(f'{oil}: {len(files[oil])} completed runs')

akns: 49 completed runs
bunker: 2267 completed runs
dilbit: 1 completed runs
jet: 16 completed runs
diesel: 3969 completed runs
gas: 54 completed runs
other: 48 completed runs


In [1]:
from datetime import date

today = date.today()
print("Today's date:", today)

Today's date: 2021-10-07


In [6]:
from datetime import datetime
# datetime object containing current date and time
now = datetime.now()
 
print("now =", now)

# dd/mm/YY H:M:S
dt_string = now.strftime("%d%m%Y_%H:%M:%S")
print("date and time =", dt_string)	

now = 2021-10-07 14:29:31.691427
date and time = 07102021_14:29:31
