Mapping particle tracks from Ocean Parcels unto the Salish Sea Atlantis Boxes. 
Original code written by Bec Gordon & Javier Porobic, CSIRO.
Link to the [SSAM Ocean Parcels Repo](https://bitbucket.csiro.au/users/por07g/repos/ssam_oceanparcels/browse)

In [2]:
import os
import xarray as xr
import numpy as np
import geopandas as gpd
import pandas as pd
from netCDF4 import Dataset
from shapely.geometry import Point

In [3]:
shapefile_name = "/ocean/rlovindeer/Atlantis/ssam_oceanparcels/SalishSea/SalishSea_July172019_2/SalishSea_July172019.shp"
data_df_original = gpd.read_file(shapefile_name)
data_df_original = data_df_original.sort_values(by=['BOX_ID'])
data_df = data_df_original.set_index('BOX_ID')
box_depth = data_df['BOTZ']
box_area = data_df['AREA']
box_volume = box_area * box_depth
surface_volume = box_area * 25

#print(box_volume)

In [35]:
# Ocean Parcels Spill File
inputFileName = 'results/5b_TurnPoint_Dilbit_2019-01-20_10000_OP_D50_wp3.zarr'
num_particles = 10000

In [5]:
scenario = inputFileName.split(sep = '_')

In [6]:
# Oil type properties & spill location selection

Dilbit = {
    "Density": 1011.2, #kg/m^3
    "Naphthalene": 24, #mg/kg oil
    "Phenanthrene": 17,
    "Pyrene": 10,
    "Benzo": 3,
}

BunkerC = {
    "Density": 995.3,
    "Naphthalene": 680,
    "Phenanthrene": 796,
    "Pyrene": 266,
    "Benzo": 56,
}

Diesel = {
    "Density": 831.0,
    "Naphthalene": 3664,
    "Phenanthrene": 1000,
    "Pyrene": 0.000,
    "Benzo": 0.000,
}

Crude = {
    "Density": 884.7,
    "Naphthalene": 654,
    "Phenanthrene": 327,
    "Pyrene": 13,
    "Benzo": 2,
}

fuel_type = {
    "Dilbit" : Dilbit,
    "BunkerC" : BunkerC,
    "Diesel" : Diesel,
    "Crude" : Crude,
}

spill_volume = {
    "5b" : 2000, #m^3 
    "6a" : 15,
    "7a" : 1000,
    "4a" : 500,
}

spill_box_surface_volume = {
    "5b" : (322271112.331102*25), #m^3 area x surface depth
    "6a" : 15,
    "7a" : 1000,
    "4a" : 500,
}

In [7]:
# Calculations of oil per particle in mg/m^3/particle
release_start = scenario[3]
oil_per_particle = (fuel_type[scenario[2]]["Density"] * spill_volume[scenario[0]] / spill_box_surface_volume[scenario[0]]) / num_particles #kg/m3
naph_per_particle = oil_per_particle * fuel_type[scenario[2]]["Naphthalene"] 
phen_per_particle = oil_per_particle * fuel_type[scenario[2]]["Phenanthrene"]
pyrene_per_particle = oil_per_particle * fuel_type[scenario[2]]["Pyrene"]
benzo_per_particle = oil_per_particle * fuel_type[scenario[2]]["Benzo"]
release_start_time = np.datetime64(release_start)

In [8]:
# Calculations of oil mass in mg
oil_mass_kg = (fuel_type[scenario[2]]["Density"] * spill_volume[scenario[0]])
naph_mass_mg = oil_mass_kg * fuel_type[scenario[2]]["Naphthalene"]
phen_mass_mg = oil_mass_kg * fuel_type[scenario[2]]["Phenanthrene"]
pyrene_mass_mg = oil_mass_kg * fuel_type[scenario[2]]["Pyrene"]
benzo_mass_mg = oil_mass_kg * fuel_type[scenario[2]]["Benzo"]
oil_mass_mg = oil_mass_kg * 1e6

print(str(scenario[2])+' spill mass of '+str(oil_mass_mg)+' in mg')
print('Naphthalene spill mass of '+str(naph_mass_mg)+' in mg')
print('Phenanthrene spill mass of '+str(phen_mass_mg)+' in mg')
print('Pyrene spill mass of '+str(pyrene_mass_mg)+' in mg')
print('Benzo spill mass of '+str(benzo_mass_mg)+' in mg')

Dilbit spill mass of 2022400000000.0 in mg
Naphthalene spill mass of 48537600.0 in mg
Phenanthrene spill mass of 34380800.0 in mg
Pyrene spill mass of 20224000.0 in mg
Benzo spill mass of 6067200.0 in mg


In [9]:
# Calculating the surface concentration of each contaminant for the entire spill
oil_full = oil_per_particle * num_particles * 1e6 / spill_box_surface_volume[scenario[0]]
naph_full = naph_per_particle * num_particles
phen_full = phen_per_particle * num_particles
pyrene_full = pyrene_per_particle * num_particles
benzo_full = benzo_per_particle * num_particles

print(str(scenario[2])+' spill concentration of '+str(oil_full)+' in mg/m^3')
print('Naphthalene spill concentration of '+str(naph_full)+' in mg/m^3')
print('Phenanthrene spill concentration of '+str(phen_full)+' in mg/m^3')
print('Pyrene spill concentration of '+str(pyrene_full)+' in mg/m^3')
print('Benzo spill concentration of '+str(benzo_full)+' in mg/m^3')

Dilbit spill concentration of 3.115618565968159e-08 in mg/m^3
Naphthalene spill concentration of 0.006024443165123948 in mg/m^3
Phenanthrene spill concentration of 0.004267313908629464 in mg/m^3
Pyrene spill concentration of 0.0025101846521349783 in mg/m^3
Benzo spill concentration of 0.0007530553956404935 in mg/m^3


In [10]:
numLayers = 7
numSites = data_df.shape[0]
numTargetSites = numSites

#outputDT = 60*60
outputDT = 43100.00

stepsPerDay = int(86400.0/ outputDT)
numStepsPerDT = int(outputDT/3600.0)

debug = False

In [34]:
pfile_nc = xr.open_dataset(str(inputFileName), decode_cf=True)

lon = np.ma.filled(pfile_nc.variables['lon'], np.nan)
lat = np.ma.filled(pfile_nc.variables['lat'], np.nan)
time = np.ma.filled(pfile_nc.variables['time'], np.nan)
z = np.ma.filled(pfile_nc.variables['z'], np.nan)
probs = np.ma.filled(pfile_nc.variables['decay_value'], np.nan)

In [28]:
lon

array([[-123.29342182, -123.29644287, -123.29800888, ...,           nan,
                  nan,           nan],
       [-123.29308907, -123.29025082, -123.29072633, ...,           nan,
                  nan,           nan],
       [-123.29331392, -123.29054534, -123.28928532, ...,           nan,
                  nan,           nan],
       ...,
       [-123.29433328, -123.29680695, -123.2994088 , ...,           nan,
                  nan,           nan],
       [-123.29352301, -123.28828452, -123.27757902, ...,           nan,
                  nan,           nan],
       [-123.29374576, -123.30557095, -123.29931746, ...,           nan,
                  nan,           nan]])

In [31]:
probs

array([[ 1.,  1.,  1., ..., nan, nan, nan],
       [ 1.,  1.,  1., ..., nan, nan, nan],
       [ 1.,  1.,  1., ..., nan, nan, nan],
       ...,
       [ 1.,  1.,  1., ..., nan, nan, nan],
       [ 1.,  1.,  1., ..., nan, nan, nan],
       [ 1.,  1.,  1., ..., nan, nan, nan]], dtype=float32)

In [32]:
time

array([['2019-01-14T12:00:00.000000000', '2019-01-14T13:00:00.000000000',
        '2019-01-14T14:00:00.000000000', ...,
                                  'NaT',                           'NaT',
                                  'NaT'],
       ['2019-01-14T09:00:00.000000000', '2019-01-14T10:00:00.000000000',
        '2019-01-14T11:00:00.000000000', ...,
                                  'NaT',                           'NaT',
                                  'NaT'],
       ['2019-01-14T09:00:00.000000000', '2019-01-14T10:00:00.000000000',
        '2019-01-14T11:00:00.000000000', ...,
                                  'NaT',                           'NaT',
                                  'NaT'],
       ...,
       ['2019-01-14T11:00:00.000000000', '2019-01-14T12:00:00.000000000',
        '2019-01-14T13:00:00.000000000', ...,
                                  'NaT',                           'NaT',
                                  'NaT'],
       ['2019-01-14T04:00:00.000000000', '20

In [36]:
pfile = xr.open_zarr(str(inputFileName))

lon = pfile["lon"].values
lat = pfile['lat'].values
time = pfile['time'].values
z = pfile['z'].values
probs = pfile['decay_value'].values
obs = pfile['obs'].values

In [37]:
lon

array([[-123.29366191, -123.26443759, -123.23193191, ..., -123.10831625,
        -123.10831625, -123.10831625],
       [-123.29370508, -123.26488902, -123.23227887, ..., -123.10832544,
        -123.10832544, -123.10832544],
       [-123.29408843, -123.26551851, -123.23433243, ..., -123.18835853,
        -123.18835853, -123.18835853],
       ...,
       [          nan,           nan,           nan, ...,           nan,
                  nan,           nan],
       [          nan,           nan,           nan, ...,           nan,
                  nan,           nan],
       [          nan,           nan,           nan, ...,           nan,
                  nan,           nan]])

In [43]:
lon[:][3]

array([-123.29302212, -123.26336562, -123.23113572, -123.20723567,
       -123.20013839, -123.19001845, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061277,
       -123.18061277, -123.18061277, -123.18061277, -123.18061

In [45]:
probs[:][1]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.

In [48]:
time[:][2]

array(['2019-01-20T00:00:00.000000000', '2019-01-20T01:00:00.000000000',
       '2019-01-20T02:00:00.000000000', '2019-01-20T03:00:00.000000000',
       '2019-01-20T04:00:00.000000000', '2019-01-20T05:00:00.000000000',
       '2019-01-20T06:00:00.000000000', '2019-01-20T07:00:00.000000000',
       '2019-01-20T08:00:00.000000000', '2019-01-20T09:00:00.000000000',
       '2019-01-20T10:00:00.000000000', '2019-01-20T11:00:00.000000000',
       '2019-01-20T12:00:00.000000000', '2019-01-20T13:00:00.000000000',
       '2019-01-20T14:00:00.000000000', '2019-01-20T15:00:00.000000000',
       '2019-01-20T16:00:00.000000000', '2019-01-20T17:00:00.000000000',
       '2019-01-20T18:00:00.000000000', '2019-01-20T19:00:00.000000000',
       '2019-01-20T20:00:00.000000000', '2019-01-20T21:00:00.000000000',
       '2019-01-20T22:00:00.000000000', '2019-01-20T23:00:00.000000000',
       '2019-01-21T00:00:00.000000000', '2019-01-21T01:00:00.000000000',
       '2019-01-21T02:00:00.000000000', '2019-01-21

numParticles = lon.shape[0]
trackDates = []

for i in range(0,numParticles):
    #print(time[i][0])
    trackDates.append(time[i][0])

RDiff = max(trackDates) - min(trackDates)
minDate = np.datetime64(release_start+"T00:30:00")
ts = pd.to_datetime(str(minDate))
d = ts.strftime('%Y-%m-%d %H:%M:%S')
print(d)

In [74]:
numParticles = lon.shape[0]
trackDates = time[0,:]

RDiff = max(trackDates) - min(trackDates)
minDate = np.datetime64(release_start+"T00:30:00")
ts = pd.to_datetime(str(minDate))
d = ts.strftime('%Y-%m-%d %H:%M:%S')
print(d)

2019-01-20 00:30:00


In [67]:
time[:,0].shape

(10007,)

In [68]:
RDiff

numpy.timedelta64(86400000000000,'ns')

In [75]:
RDiff

numpy.timedelta64(853200000000000,'ns')

In [71]:
ts

Timestamp('2019-01-20 00:30:00')

In [50]:
trackDates

[numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datetime64('2019-01-20T00:00:00.000000000'),
 numpy.datet

In [76]:
trackDates

array(['2019-01-20T00:00:00.000000000', '2019-01-20T01:00:00.000000000',
       '2019-01-20T02:00:00.000000000', '2019-01-20T03:00:00.000000000',
       '2019-01-20T04:00:00.000000000', '2019-01-20T05:00:00.000000000',
       '2019-01-20T06:00:00.000000000', '2019-01-20T07:00:00.000000000',
       '2019-01-20T08:00:00.000000000', '2019-01-20T09:00:00.000000000',
       '2019-01-20T10:00:00.000000000', '2019-01-20T11:00:00.000000000',
       '2019-01-20T12:00:00.000000000', '2019-01-20T13:00:00.000000000',
       '2019-01-20T14:00:00.000000000', '2019-01-20T15:00:00.000000000',
       '2019-01-20T16:00:00.000000000', '2019-01-20T17:00:00.000000000',
       '2019-01-20T18:00:00.000000000', '2019-01-20T19:00:00.000000000',
       '2019-01-20T20:00:00.000000000', '2019-01-20T21:00:00.000000000',
       '2019-01-20T22:00:00.000000000', '2019-01-20T23:00:00.000000000',
       '2019-01-21T00:00:00.000000000', '2019-01-21T01:00:00.000000000',
       '2019-01-21T02:00:00.000000000', '2019-01-21

In [78]:
trackDates.shape

(238,)

In [21]:
numReleaseDays = 1
numReleaseSteps = numReleaseDays * stepsPerDay

trackLength = len(lon[0])

print('trackLength = ' + str(trackLength))
print('numStepsPerDT = ' + str(numStepsPerDT))
numSteps = int(trackLength / numStepsPerDT)


trackLength = 238
numStepsPerDT = 11


In [22]:
# Create the netcdf output file

netcdfFileName = "SSAM_Scenario_" + scenario[0] + "_" + scenario[3] + "_" + str(num_particles) + "_2024.nc"
try:
    os.remove(netcdfFileName)
except:
    pass
ncfile = Dataset(netcdfFileName, "w", format="NETCDF4", clobber=True)
Dataset.set_fill_on(ncfile)

# Dimensions
time = ncfile.createDimension("t", None)
b = ncfile.createDimension("b", numTargetSites)
z = ncfile.createDimension("z", numLayers)

In [23]:
# Variables
times = ncfile.createVariable("t",np.float64, ("t",))
oil = ncfile.createVariable("oil",np.float64,("t", "b"))
Naphthalene = ncfile.createVariable("Naphthalene",np.float64, ("t", "b", "z"))
Phenanthrene = ncfile.createVariable("Phenanthrene",np.float64,("t", "b", "z"))
Pyrene = ncfile.createVariable("Pyrene",np.float64,("t", "b", "z"))
Benzo = ncfile.createVariable("Benzo",np.float64,("t", "b", "z"))

# Attributes
Naphthalene.units = "mgPAH/m^3"
Naphthalene.long_name = "Naphthalene"
Naphthalene.missing_value = 0.0000
Naphthalene.valid_min = 0.0000
Naphthalene.valid_max = 100000000.0

Phenanthrene.units = "mgPAH/m^3"
Phenanthrene.long_name = "Phenanthrene"
Phenanthrene.missing_value = 0.0000
Phenanthrene.valid_min = 0.0000
Phenanthrene.valid_max = 100000000.0

Pyrene.units = "mgPAH/m^3"
Pyrene.long_name = "Pyrene"
Pyrene.missing_value = 0.0000
Pyrene.valid_min = 0.0000
Pyrene.valid_max = 100000000.0

Benzo.units = "mgPAH/m^3"
Benzo.long_name = "Benzo(a)pyrene"
Benzo.missing_value = 0.0000
Benzo.valid_min = 0.0000
Benzo.valid_max = 100000000.0

oil.units = "kgOil/m^3"
oil.long_name = "Oil"

times.units = "seconds since " + d
times.dt = outputDT
times.long_name = "time"

OPTION 1: TO CREATE FORCING FILE WITH ZEROES
Creates a contaminant forcing file with no contaminants

numSteps = int((70*60*60*24*365)/outputDT)
timeData = np.arange(0,(numSteps)*outputDT,outputDT)
times[:] = timeData

FillerData = np.zeros((numSteps, numTargetSites, numLayers))

Naphthalene[:,:,:] = FillerData
Phenanthrene[:,:,:] = FillerData
Pyrene[:,:,:] = FillerData
Benzo[:,:,:] = FillerData

ncfile.close()

In [24]:
# OPTION 2: CREATE CONTAMINANT FORCING FILE
# Populate variables with contaminant data

timeData = np.arange(0,(numSteps + numReleaseSteps)*outputDT,outputDT)
times[:] = timeData

No_layer_particles = np.zeros((numSteps + numReleaseSteps, numTargetSites))
Surface_particles = np.zeros((numSteps + numReleaseSteps, numTargetSites, numLayers))

In [25]:
for partIndex in range(0, numParticles):

    trackDateDiff = trackDates[partIndex] - minDate
    trackDateDiff = trackDateDiff/ np.timedelta64(1, 's')

    timeOffset = int(abs((trackDateDiff /outputDT)))

    for stepIndex in range(0, numSteps):
        timeValue = stepIndex + timeOffset

        partLon = lon[partIndex][stepIndex * numStepsPerDT]
        partLat = lat[partIndex][stepIndex * numStepsPerDT]
        partProb = probs[partIndex][stepIndex * numStepsPerDT]

        matchFound = 0

        for targetIndex in range (0, numTargetSites):

            box_id = targetIndex
            box_coordinates = data_df.iloc[targetIndex].geometry
            find_particle = box_coordinates.contains(Point(partLon, partLat))
            
            if data_df.iloc[targetIndex].BOTZ < 26:
                layer = 0
            elif data_df.iloc[targetIndex].BOTZ == 50:
                layer = 1
            elif data_df.iloc[targetIndex].BOTZ == 100:
                layer = 2
            elif data_df.iloc[targetIndex].BOTZ == 200:
                layer = 3
            elif data_df.iloc[targetIndex].BOTZ > 200 and box_depth[targetIndex] < 401:
                layer = 4
            elif data_df.iloc[targetIndex].BOTZ > 400:
                layer = 5

            if find_particle:
                Surface_particles[timeValue][box_id][layer] = Surface_particles[timeValue][box_id][layer] + partProb
                No_layer_particles[timeValue][box_id] = No_layer_particles[timeValue][box_id] + partProb
                
                # uncomment line below to ignore particle decay during debugging.
                # Surface_particles[timeValue][box_id] = Surface_particles[timeValue][box_id] + 1.0
            
                #matchFound = 1
                #if debug:
                #    print('At time ' + str(timeValue) + ' Particle (' + str(partIndex) + ') in box ' + str(data_df.iloc[targetIndex].BOX_ID))

                break

        if matchFound == 0:
            if debug:
                print('No match for particle')
                print(partLon, partLat)

        #break

oil[:,:] = No_layer_particles * oil_per_particle * 1e6
Naphthalene[:,:,:] = Surface_particles * naph_per_particle
Phenanthrene[:,:,:] = Surface_particles * phen_per_particle
Pyrene[:,:,:] = Surface_particles * pyrene_per_particle
Benzo[:,:,:] = Surface_particles * benzo_per_particle

ncfile.close()


ValueError: cannot convert float NaN to integer

In [39]:
np.histogram(No_layer_particles)

(array([2167,   17,   18,    3,    1,    2,    1,    0,    0,    1]),
 array([ 0. ,  7.5, 15. , 22.5, 30. , 37.5, 45. , 52.5, 60. , 67.5, 75. ]))