# Finding missions of interest

In this notebook, we use the `allDatasets` dataset from ERDDAP to examine the primary metadata of all glider missions. We will use only nrt datasets to speed up the process of examining metadata

In [1]:
import utils
import datetime
from tqdm.notebook import tqdm
import pandas as pd

In [2]:
e = utils.init_erddap()

# Fetch dataset list
e.response = "csv"
e.dataset_id = "allDatasets"
df_datasets = e.to_pandas(parse_dates=['minTime (UTC)', 'maxTime (UTC)'])

# drop the allDatasets row and make the datasetID the index for easier reading
df_datasets.set_index("datasetID", inplace=True)
df_datasets.drop("allDatasets", inplace=True)
df_datasets = df_datasets[df_datasets.index.str[:3] == "nrt"]
print(f"Analysing {len(df_datasets)} nrt datasets")

Analysing 96 nrt datasets


In [3]:
df_datasets.head()

Unnamed: 0_level_0,accessible,institution,dataStructure,cdm_data_type,class,title,minLongitude (degrees_east),maxLongitude (degrees_east),longitudeSpacing (degrees_east),minLatitude (degrees_north),...,fgdc,iso19115,metadata,sourceUrl,infoUrl,rss,email,testOutOfDate,outOfDate,summary
datasetID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
nrt_SEA068_M27,public,Voice of the Ocean Foundation,table,TimeSeries,EDDTableFromMultidimNcFiles,Aster68-20220727T1709,19.901883,19.9823,,58.19985,...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,(local files),https://cfconventions.org/cf-conventions/v1.6....,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,,,Summer 2022 Eastern Gotland Basin trials
nrt_SEA076_M8,public,Voice of the Ocean Foundation,table,TimeSeries,EDDTableFromMultidimNcFiles,Fibbla76-20221005T1504,15.736967,16.32375,,55.532217,...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,(local files),https://cfconventions.org/cf-conventions/v1.6....,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,now-1day,0.072726,Part of NS_Bornholm continuous monitoring
nrt_SEA069_M9,public,Voice of the Ocean Foundation,table,TimeSeries,EDDTableFromMultidimNcFiles,Kalmus69-20220727T1637,19.909683,19.964233,,58.192767,...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,(local files),https://cfconventions.org/cf-conventions/v1.6....,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,,,Summer 2022 Eastern Gotland Basin trials
nrt_SEA069_M11,public,Voice of the Ocean Foundation,table,TimeSeries,EDDTableFromMultidimNcFiles,Kalmus69-20220923T1506,15.990183,16.3769,,55.255733,...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,(local files),https://cfconventions.org/cf-conventions/v1.6....,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,now-12days,0.498496,Part of SAMBA continuous monitoring
nrt_SEA055_M16,public,Voice of the Ocean Foundation,table,TimeSeries,EDDTableFromMultidimNcFiles,Kaprifol55-20201031T0902,15.700337,16.237867,,55.472545,...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,(local files),https://cfconventions.org/cf-conventions/v1.6....,https://erddap.observations.voiceoftheocean.or...,https://erddap.observations.voiceoftheocean.or...,,,Part of SAMBA continuous monitoring


In [4]:
# Find datasets from deployments that lasted longer than a set number of days
df_datasets["endurance"] = df_datasets['maxTime (UTC)'] - df_datasets['minTime (UTC)']
min_days = 30
df_datasets[df_datasets["endurance"] > datetime.timedelta(days=min_days)].endurance

datasetID
nrt_SEA045_M41   33 days 17:55:04
nrt_SEA045_M43   32 days 06:48:25
nrt_SEA045_M44   31 days 14:41:22
nrt_SEA044_M32   33 days 15:40:34
nrt_SEA044_M34   32 days 12:15:46
nrt_SEA044_M35   31 days 10:01:28
Name: endurance, dtype: timedelta64[ns]

### Which glider missions extended to a depth of greater than 150 m in the seas northeast of Gotland?

In [5]:
min_altitude = -150# note the sign!
min_easting = 19
min_northing = 58
mask = (df_datasets['maxAltitude (m)'] < min_altitude).values \
* (df_datasets['maxLongitude (degrees_east)'] > min_easting).values \
* (df_datasets['maxLatitude (degrees_north)'] > min_northing).values
df_datasets[mask].index.values

array(['nrt_SEA068_M27', 'nrt_SEA069_M9', 'nrt_SEA067_M26',
       'nrt_SEA067_M27', 'nrt_SEA067_M29', 'nrt_SEA067_M30',
       'nrt_SEA067_M32', 'nrt_SEA066_M41', 'nrt_SEA066_M42',
       'nrt_SEA066_M43'], dtype=object)

---------------------

# Deeper metadata

To access metadata at the sensor level, we need to download the .das metadata for each dataset. This is currently a rather hacky text parsing job. Look at the function `get_meta` in `utils.py` for details

In [6]:
ds_meta = {}
for dataset_id in tqdm(df_datasets.index):
    ds_meta[dataset_id] = utils.get_meta(dataset_id)

  0%|          | 0/96 [00:00<?, ?it/s]

### Expanding the table

Let's add this more detailed metadata to our metadtata DataFrame so we have more scope for filtering

In [7]:
for dataset_id, meta in ds_meta.items():
    for key, val in meta.items():
        if key not in list(df_datasets):
            df_datasets[key] = None
        # Some of the metadata is stored in dicts, which pandas does not like, so we cast to string
        try:
            df_datasets.loc[dataset_id, key] = val
        except:
            df_datasets.loc[dataset_id, key] = str(val)

Let's have a look at some of this more detailed metadat that we can now run queries against

### Which datasets were collected in Bornholm?

In [8]:
bornholm_missions = []
for dataset_id, meta in ds_meta.items():
    if "bornholm" in meta["basin"].lower():
        bornholm_missions.append(dataset_id)
print(f"Missions in Bornholm: {bornholm_missions}")

Missions in Bornholm: ['nrt_SEA076_M8', 'nrt_SEA069_M11', 'nrt_SEA055_M16', 'nrt_SEA055_M18', 'nrt_SEA055_M19', 'nrt_SEA055_M20', 'nrt_SEA055_M21', 'nrt_SEA055_M24', 'nrt_SEA055_M28', 'nrt_SEA055_M31', 'nrt_SEA055_M37', 'nrt_SEA055_M43', 'nrt_SEA045_M48', 'nrt_SEA045_M54', 'nrt_SEA045_M56', 'nrt_SEA045_M60', 'nrt_SEA045_M62', 'nrt_SEA045_M64', 'nrt_SEA045_M65', 'nrt_SEA045_M67', 'nrt_SEA063_M17', 'nrt_SEA063_M18', 'nrt_SEA063_M19', 'nrt_SEA063_M20', 'nrt_SEA063_M21', 'nrt_SEA063_M22', 'nrt_SEA063_M33', 'nrt_SEA063_M35', 'nrt_SEA063_M37', 'nrt_SEA063_M38', 'nrt_SEA063_M39', 'nrt_SEA063_M40', 'nrt_SEA044_M40', 'nrt_SEA044_M48', 'nrt_SEA066_M10', 'nrt_SEA066_M12', 'nrt_SEA066_M14', 'nrt_SEA077_M11', 'nrt_SEA077_M12', 'nrt_SEA077_M13', 'nrt_SEA056_M40', 'nrt_SEA061_M38', 'nrt_SEA061_M39', 'nrt_SEA061_M40', 'nrt_SEA061_M42', 'nrt_SEA061_M43', 'nrt_SEA061_M48', 'nrt_SEA061_M50', 'nrt_SEA061_M54']


### Which datasets were collected as part of the SAMBA project during 2022?

In [9]:
start = df_datasets["maxTime (UTC)"] > pd.Timestamp("2022-01-01").tz_localize('utc') 
end = df_datasets["minTime (UTC)"] < pd.Timestamp("2023-01-01").tz_localize('utc') 
project = df_datasets["project"] == "SAMBA"
print(f"SAMBA 2022 missions:")
print(df_datasets[mask].index)

SAMBA 2022 missions:
Index(['nrt_SEA068_M27', 'nrt_SEA069_M9', 'nrt_SEA067_M26', 'nrt_SEA067_M27',
       'nrt_SEA067_M29', 'nrt_SEA067_M30', 'nrt_SEA067_M32', 'nrt_SEA066_M41',
       'nrt_SEA066_M42', 'nrt_SEA066_M43'],
      dtype='object', name='datasetID')
