# How to convert typical Lagrangian datasets to a ragged array to use the CloudDrift toolbox?

- GLAD experiment CODE-style drifter trajectories (low-pass filtered, 15 minute interval records), northern Gulf of Mexico near DeSoto Canyon, July-October 2012
https://data.gulfresearchinitiative.org/data/R1.x134.073:0004

In [1]:
import pandas as pd
import xarray as xr
import os
import urllib.request

In [2]:
# download the data
file_glad = 'GLAD_15min_filtered.dat'
if not os.path.isfile(file_glad):
    url = 'https://data.gulfresearchinitiative.org/pelagos-symfony/api/file/download/169841'
    req = urllib.request.urlretrieve(url, file_glad)
else:
    pass   

In [3]:
df = pd.read_csv(file_glad, delimiter='\s+', header=5, 
                 names=['id', 'date', 'time', 'lat', 'lon', 'err_pos', 've', 'vn', 'err_vel'])

df.insert(0, 'datetime', pd.to_datetime(df['date'] + " " + df['time']))
df = df.drop(labels=['date', 'time'], axis=1)
df.id = pd.to_numeric(df.id.str.slice(start=-3))
df = df.set_index('id')

In [4]:
df

Unnamed: 0_level_0,datetime,lat,lon,err_pos,ve,vn,err_vel
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,2012-07-20 01:30:00.172800,28.561528,-87.209627,10.0,0.022,-0.230,0.033
1,2012-07-20 01:45:00.201530,28.559712,-87.209429,10.0,0.021,-0.213,0.033
1,2012-07-20 02:00:00.230360,28.558055,-87.209234,10.0,0.021,-0.197,0.033
1,2012-07-20 02:15:00.259200,28.556529,-87.209033,10.0,0.022,-0.184,0.033
1,2012-07-20 02:30:00.287930,28.555091,-87.208814,10.0,0.025,-0.175,0.033
...,...,...,...,...,...,...,...
451,2012-08-27 04:45:07.796162,26.338664,-87.101153,226.3,0.532,-0.285,0.033
451,2012-08-27 05:00:07.799045,26.336314,-87.096215,227.0,0.525,-0.281,0.033
451,2012-08-27 05:15:07.801919,26.333967,-87.091274,227.7,0.501,-0.268,0.033
451,2012-08-27 05:30:07.804802,26.331789,-87.086681,228.2,0.465,-0.248,0.033


# Create the ragged array using xarray.Datasets()

In [5]:
c = df.index.value_counts(sort=False)
unique_id, rowsize = c.index, c.values

ds = xr.Dataset(
    data_vars=dict(
        rowsize=(['traj'], rowsize, {'long_name': 'Number of observations per trajectory', 'units':'-'}),
        err_pos=(['obs'], df.err_pos, {'long_name': 'estimated position error', 'units':'m'}),
        ve=(['obs'], df.ve, {'long_name': 'Eastward velocity', 'units':'m/s'}),
        vn=(['obs'], df.vn, {'long_name': 'Northward velocity', 'units':'m/s'}),
        err_vel=(['obs'], df.err_vel, {'long_name': 'Standard error in latitude', 'units':'degrees_north'}),    
    ),

    coords=dict(
        ID=(['traj'], unique_id, {'long_name': 'Buoy ID', 'units':'-'}),
        longitude=(['obs'], df.lon, {'long_name': 'Longitude', 'units':'degrees_east'}),
        latitude=(['obs'], df.lat, {'long_name': 'Latitude', 'units':'degrees_north'}),
        time=(['obs'], df.datetime, {'long_name': 'Time'}),
        ids=(['obs'], df.index, {'long_name': "Buoy ID for all observations", 'units':'-'}),
    ),

    attrs={
        'title': 'Example of ragged array creation for a typical Lagrangian data set',
    }
)

In [6]:
ds