# Index of Runs

MOM runs can be identified by the presence of key files such as `diag_table` and `data_table`

Goal: generate a class that represents MOM metadata and a JSON representation

These are the top most directories to search for MOM output.

In [2]:
directoriesToSearch = ['/g/data3/hh5/tmp/cosima', 
                      ]

Walk the directory structure looking for NetCDF files (extention .nc)

In [136]:
%%time
import os
import re
import fnmatch

# search for NetCDF files that reside (perhaps deeply) within
# output* directories
# match the parent and grandparent directory to configuration/experiment
m = re.compile('((.*)/(.*)/(.*)/(output\d+)/(.*\.nc))')

ncfiles = []
for directoryToSearch in directoriesToSearch:
    for root, dirs, filenames in os.walk(directoryToSearch):
        for filename in filenames:
            matched = m.match(os.path.join(root, filename))
            if matched is not None:
                ncfiles.append(matched.groups(0) + (filename,))

CPU times: user 1.77 s, sys: 85 ms, total: 1.86 s
Wall time: 3.3 s


The data directory contains several model __configurations__ (mom01v5 or mom025)

Each configuration contains a number of __experiments__ (KDS75 or KDS75_wind)

Which are each made up of a set of several __runs__ (e.g. output266)

In [145]:
import pandas as pd

df = pd.DataFrame(ncfiles, 
                  columns = ['ncfile', 'rootdir', 'configuration', 'experiment', 'run', 'filename', 'basename' ])

Configurations:

In [157]:
print(df.configuration.unique())

['mom025' 'mom01v5' 'access-om2-025' 'cosima']


Experiments

In [156]:
print((df.configuration +'/'+ df.experiment).unique())

['mom025/mom025_jra_ryf9091_saltunderice' 'mom025/mom025_jra_ryf0304'
 'mom025/mom025_jra_ryf9091' 'mom025/mom025_jra_ryf8485'
 'mom025/mom025_nyf' 'mom025/mom025_nyf_salt' 'mom01v5/KDS75_wind'
 'mom01v5/KDS75_UP' 'mom01v5/GFDL50' 'mom01v5/KDS75'
 'mom01v5/KDS75_saltfluxes' 'mom01v5/KDS75_PI' 'mom01v5/KDS75_salt10days'
 'access-om2-025/025deg_jra55_ryf_spinup1' 'cosima/access-om2-025']


Total number of runs

In [158]:
len((df.configuration +'/'+ df.experiment + '/' + df.run).unique())

1335

In [1]:
import os
import tqdm
import yaml
import f90nml
import csv
import datetime

ModuleNotFoundError: No module named 'f90nml'

In [71]:
output_dirs = []
for projectDir in directoriesToSearch:
    for dirpath, dirnames, filenames in os.walk(projectDir):
        if 'diag_table' in filenames:
            output_dirs.append(dirpath)


File| Description
---- | ----------
input.nml | Principal configuration
diag_table | Diagnostic output management
data_table  | Input and boundary condition data field management
field_table | Initial condition and advection scheme configuration
config.yaml | 
logfile.000000.out |
mom.err | 
mom.out | 
time_stamp.out |


Output files:
---------- |
ice_month.nc |
ocean__0096_184.nc|

ocean__0096_189.nc
ocean__0096_194.nc
ocean__0096_199.nc
ocean__0096_204.nc
ocean__0096_209.nc
ocean__0096_214.nc
ocean__0096_219.nc
ocean__0096_224.nc
ocean__0096_229.nc
ocean__0096_234.nc
ocean__0096_239.nc
ocean__0096_244.nc
ocean__0096_249.nc
ocean__0096_254.nc
ocean__0096_259.nc
ocean__0096_264.nc
ocean__0096_269.nc
ocean_grid.nc
ocean_month.nc
ocean.nc
ocean_scalar.nc

In [151]:
class Run():
    def __init__(self, dirpath):
        self.config = yaml.load(open(os.path.join(dirpath, 'config.yaml')))
        
        self.input = f90nml.read(os.path.join(dirpath, 'input.nml'))
        
        # diag_table
        # http://data1.gfdl.noaa.gov/~nnz/MOM/mom5_pubrel_August2012/src/shared/diag_manager/diag_table.html
        with open(os.path.join(dirpath, 'diag_table'), 'r') as f:
            reader = csv.reader(f)
            self.diag_table = []
            for row in reader:
                if len(row) > 0 and row[0][0] == '#':
                    continue            
                self.diag_table.append([value.replace('"', '').strip() for value in row])
        
        # Global Section
        self.title = self.diag_table[0][0] # first line
        self.base_date = self.diag_table[1][0]
        
        # 1 1 1 0 0 0  => 0001 1 1 0 0 0 for a valid datetime string
        if self.base_date.split()[0] == '1':
            self.base_date = '000' + self.base_date
        
        # year month day hour minute second
        self.base_date = datetime.datetime.strptime(self.base_date, '%Y %m %d %H %M %S')
        
        self.files = {}
        self.fields = {}
        # Files and Fields can be mixed
        for line in self.diag_table[2:]:
            if len(line) == 0:
                continue
                
            try:
                int(line[1]) # Files have integers in field 2
            except:
                file_line = False
            else:
                file_line = True
                    
            if file_line:
                # File Section
                self.files[line[0]] = line
            else:
                # Field Section
                print(line)
                self.fields[line[1]] = line
        
        with open(os.path.join(dirpath, 'data_table'), 'r') as f:
            reader = csv.reader(f)
            self.data_table = []
            for row in reader:
                if row[0][0] == '#':
                    continue            
                self.data_table.append([value.replace('"', '').strip() for value in row])
        
        with open(os.path.join(dirpath, 'field_table'), 'r') as f:
            reader = csv.reader(f)
            self.field_table = []
            for row in reader:
                if len(row) > 0 and row[0][0] == '#':
                    continue            
                self.field_table.append([value.replace('"', '').strip() for value in row])

        lines = open(os.path.join(dirpath, 'time_stamp.out'), 'r').readlines()
        self.time_stamp = [line.strip() for line in lines]


In [152]:
run = Run(output_dirs[0])

['ocean_model', 'geolon_t', 'geolon_t', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'geolat_t', 'geolat_t', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'geolon_c', 'geolon_c', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'geolat_c', 'geolat_c', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'ht', 'ht', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'hu', 'hu', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'area_t', 'area_t', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'area_u', 'area_u', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'kmt', 'kmt', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'kmu', 'kmu', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'drag_coeff', 'drag_coeff', 'ocean_grid', 'all', '.false.', 'none', '2']
['ocean_model', 'geolon_t', 'geolon_t', 'ocean', 'all', '.false.', 'none', '2']
['ocean_model', 'geolat_t', 'geol

In [146]:
run.base_date.isoformat()

'0001-01-01T00:00:00'

In [153]:
run.fields

{'CN': ['ice_model', 'CN', 'CN', 'ice_month', 'all', '.true.', 'none', '2'],
 'HI': ['ice_model', 'HI', 'HI', 'ice_month', 'all', '.true.', 'none', '2'],
 'HS': ['ice_model', 'HS', 'HS', 'ice_month', 'all', '.true.', 'none', '2'],
 'age_global': ['ocean_model',
  'age_global',
  'age_global',
  'ocean',
  'all',
  '.true.',
  'none',
  '2'],
 'area_t': ['ocean_model',
  'area_t',
  'area_t',
  'ocean_grid',
  'all',
  '.false.',
  'none',
  '2'],
 'area_u': ['ocean_model',
  'area_u',
  'area_u',
  'ocean_grid',
  'all',
  '.false.',
  'none',
  '2'],
 'drag_coeff': ['ocean_model',
  'drag_coeff',
  'drag_coeff',
  'ocean_grid',
  'all',
  '.false.',
  'none',
  '2'],
 'eta_global': ['ocean_model',
  'eta_global',
  'eta_global',
  'ocean_scalar',
  'all',
  '.true.',
  'none',
  '2'],
 'geolat_c': ['ocean_model',
  'geolat_c',
  'geolat_c',
  'oceankerg_%4yr_%3dy',
  'all',
  '.false.',
  '-280,80,-81,-30,-1,-1',
  '2'],
 'geolat_t': ['ocean_model',
  'geolat_t',
  'geolat_t',
  'ocea

In [78]:
output_dirs[0]

'/g/data3/hh5/tmp/cosima/mom01v5/KDS75_wind/output165'

In [155]:
run.fields

{'CN': ['ice_model', 'CN', 'CN', 'ice_month', 'all', '.true.', 'none', '2'],
 'HI': ['ice_model', 'HI', 'HI', 'ice_month', 'all', '.true.', 'none', '2'],
 'HS': ['ice_model', 'HS', 'HS', 'ice_month', 'all', '.true.', 'none', '2'],
 'age_global': ['ocean_model',
  'age_global',
  'age_global',
  'ocean',
  'all',
  '.true.',
  'none',
  '2'],
 'area_t': ['ocean_model',
  'area_t',
  'area_t',
  'ocean_grid',
  'all',
  '.false.',
  'none',
  '2'],
 'area_u': ['ocean_model',
  'area_u',
  'area_u',
  'ocean_grid',
  'all',
  '.false.',
  'none',
  '2'],
 'drag_coeff': ['ocean_model',
  'drag_coeff',
  'drag_coeff',
  'ocean_grid',
  'all',
  '.false.',
  'none',
  '2'],
 'eta_global': ['ocean_model',
  'eta_global',
  'eta_global',
  'ocean_scalar',
  'all',
  '.true.',
  'none',
  '2'],
 'geolat_c': ['ocean_model',
  'geolat_c',
  'geolat_c',
  'oceankerg_%4yr_%3dy',
  'all',
  '.false.',
  '-280,80,-81,-30,-1,-1',
  '2'],
 'geolat_t': ['ocean_model',
  'geolat_t',
  'geolat_t',
  'ocea

In [2]:
import tqdm

In [21]:
ls -1 /g/data3/hh5/tmp/cosima/mom01v5/KDS75_PI/output372

[0m[01;32mconfig.yaml[0m*
[01;32mdata_table[0m*
[01;32mdiag_table[0m*
[01;32mfield_table[0m*
ice_month.nc
[01;32minput.nml[0m*
logfile.000000.out
mom.err
mom.out
ocean__0096_184.nc
ocean__0096_189.nc
ocean__0096_194.nc
ocean__0096_199.nc
ocean__0096_204.nc
ocean__0096_209.nc
ocean__0096_214.nc
ocean__0096_219.nc
ocean__0096_224.nc
ocean__0096_229.nc
ocean__0096_234.nc
ocean__0096_239.nc
ocean__0096_244.nc
ocean__0096_249.nc
ocean__0096_254.nc
ocean__0096_259.nc
ocean__0096_264.nc
ocean__0096_269.nc
ocean_grid.nc
ocean_month.nc
ocean.nc
ocean_scalar.nc
time_stamp.out
[m

In [13]:
cd /g/data3/hh5/tmp/cosima/mom01v5/

/g/data3/hh5/tmp/cosima/mom01v5


In [14]:
ls

[0m[01;34mfigures[0m/  [01;34mGFDL50[0m/  [01;34mKDS75[0m/  [01;34mKDS75_PI[0m/  [01;34mKDS75_wind[0m/  MOM01_Diagnostics.ipynb
[m

In [15]:
cd GFDL50/

/g/data3/hh5/tmp/cosima/mom01v5/GFDL50


In [19]:
cat time_stamp.out

  56   1   1   0   0   0  Jan
  56   4   1   0   0   0  Apr
