Extract beaching information

In [114]:
import datetime as dt
from pathlib import Path
import xarray as xr

In [21]:
def lines_that_contain(string, fp):
    return [line for line in fp if string in line]

In [68]:
def get_parameters(direct):
    for myi in direct.glob('Lagrangian*.dat'):
        asstr = str(myi)
        OilType = asstr[asstr.find('gian_') + 5:asstr.find('-')]
        with open(myi, "r") as fp:
            for line in lines_that_contain("POINT_VOLUME", fp):
                if ':' in line:
                    SpillVolume = float(line[line.find(':')+2:-1])
        with open(myi, "r") as fp:
            for line in lines_that_contain("POSITION_COORDINATES", fp):
                if ':' in line:
                    numbers = (line[line.find(':')+2:-1]).split(' ')
                    lon, lat = float(numbers[0]), float(numbers[1])
    for myi in direct.glob('Model*.dat'):
        with open(myi, "r") as fp:
            for line in lines_that_contain("START", fp):
                numbers = (line[line.find(':')+2:-1]).split(' ')
                startdatetime = dt.datetime(int(numbers[0]), int(numbers[1]), int(numbers[2]),
                                   int(numbers[3]), int(numbers[4]), int(numbers[5]))
    return OilType, SpillVolume, lon, lat, startdatetime

In [105]:
def get_beaching_data(direct):
    for myi in direct.glob('Lagrangian*.nc'):
        data = xr.open_dataset(myi)
        BeachTime = data.Beaching_Time
        BeachVolume = data.Beaching_Volume
        grid_y, grid_x = data.grid_y, data.grid_x
        
        ncfile = str(myi)
        filename = f'Beaching{(ncfile[ncfile.find("Lagrangian")+10:])}'
    return BeachTime, BeachVolume, grid_y, grid_x, filename

In [115]:
def prepare_dataset(variables, grid_y, grid_x):

    ds_attrs = {
        'acknowledgements':
            'MOHID output',
        'creator_email':
            'sallen@eoas.ubc.ca',
        'creator_name':
            'Salish Sea MEOPAR Project Contributors',
        'creator_url':
            'https://ubc-moad-docs.readthedocs.org/',
        'institution':
            'UBC EOAS',
        'institution_fullname': (
            'Earth, Ocean & Atmospheric Sciences,'
            ' University of British Columbia'
        ),
        'summary': (
            'Beaching Time and Volume from a Specific Run'
        ),
        'source': (
            'analysis-susan/notebooks/MOHID/SaveBeaching.ipynb'
        ),
        'history': (
            '[{}] File creation.'
            .format(dt.datetime.today().strftime('%Y-%m-%d'))
        )
    }

    da = {}
    for var in ['Beaching_Volume', 'Beaching_Time']:
        da[var] = xr.DataArray(
            data=variables[var],
            name=var,
            dims=('grid_y', 'grid_x'),
            coords={
                'grid_y': grid_y,
                'grid_x': grid_x,
            })
        
    da_attrs = {'OilType': {'units': 'None',
                            'long_name': 'Type of oil spilled and run',
                           },
                'SpillVolume': {'units': 'm3',
                                'long_name': 'Volume of oil initially spilled'},
                'lon': {},
                'lat': {},
                'startdatetime': {'long_name': 'Date and time of Oil Spill'}}
    for var in ['OilType', 'SpillVolume', 'lon', 'lat', 'startdatetime']:
        da[var] = xr.DataArray(
            data=variables[var],
            name=var,
            dims=(),
            attrs=da_attrs[var]
            )

    ds = xr.Dataset(
        data_vars={
            'Beaching_Volume': da['Beaching_Volume'],
            'Beaching_Time': da['Beaching_Time'],
            'OilType': da['OilType'],
            'SpillVolume': da['SpillVolume'],
            'SpillLon': da['lon'],
            'SpillLat': da['lat'],
            'Spilldatetime': da['startdatetime']
        },
        coords={
        'grid_y': grid_y,
                'grid_x': grid_x,
        },
        attrs=ds_attrs
    )

    return ds


In [96]:
def write_out_file(ds, filename):
    encoding = {var: {'zlib': True} for var in ds.data_vars}
    ds.to_netcdf(
        path=filename,
        encoding=encoding,
    )

In [112]:
direct = Path('/data/sallen/results/MIDOSS/test_dir/')
da = {}
da['OilType'], da['SpillVolume'], da['lon'], da['lat'], da['startdatetime'] = get_parameters(direct)
da['Beaching_Time'], da['Beaching_Volume'], grid_y, grid_x, filename = get_beaching_data(direct)
ds = prepare_dataset(da, grid_y, grid_x)
write_out_file(ds, filename)

In [113]:
ds

In [120]:
# test pulling out directories with Path glob
toppath = Path('/results/SalishSea/nowcast-blue.201905/')
for myd in toppath.glob('*'):
    print (myd)
    for myi in myd.glob('namelist*'):
        print (myi)
    stop
    
# works like a dream

/results/SalishSea/nowcast-blue.201905/31dec21
/results/SalishSea/nowcast-blue.201905/31dec21/namelist_cfg
/results/SalishSea/nowcast-blue.201905/31dec21/namelist_ref


NameError: name 'stop' is not defined