# How to convert typical Lagrangian datasets to a ragged array to use the CloudDrift toolbox?

- GLAD experiment CODE-style drifter trajectories (low-pass filtered, 15 minute interval records), northern Gulf of Mexico near DeSoto Canyon, July-October 2012
https://data.gulfresearchinitiative.org/data/R1.x134.073:0004

In [None]:
import pandas as pd
import xarray as xr
import os
import urllib.request

In [None]:
# download the data
file_glad = 'GLAD_15min_filtered.dat'
if not os.path.isfile(file_glad):
    url = 'https://data.gulfresearchinitiative.org/pelagos-symfony/api/file/download/169841'
    req = urllib.request.urlretrieve(url, file_glad)
else:
    pass   

In [None]:
df = pd.read_csv(file_glad, delimiter='\s+', header=5, 
                 names=['id', 'date', 'time', 'lat', 'lon', 'err_pos', 've', 'vn', 'err_vel'])

df.insert(0, 'datetime', pd.to_datetime(df['date'] + " " + df['time']))
df = df.drop(labels=['date', 'time'], axis=1)
df.id = pd.to_numeric(df.id.str.slice(start=-3))
df = df.set_index('id')

In [None]:
df

# Create the ragged array using xarray.Datasets()

In [None]:
c = df.index.value_counts(sort=False)
unique_id, rowsize = c.index, c.values

ds = xr.Dataset(
    data_vars=dict(
        rowsize=(['traj'], rowsize, {'long_name': 'Number of observations per trajectory', 'units':'-'}),
        err_pos=(['obs'], df.err_pos, {'long_name': 'estimated position error', 'units':'m'}),
        ve=(['obs'], df.ve, {'long_name': 'Eastward velocity', 'units':'m/s'}),
        vn=(['obs'], df.vn, {'long_name': 'Northward velocity', 'units':'m/s'}),
        err_vel=(['obs'], df.err_vel, {'long_name': 'Standard error in latitude', 'units':'degrees_north'}),    
    ),

    coords=dict(
        ID=(['traj'], unique_id, {'long_name': 'Buoy ID', 'units':'-'}),
        longitude=(['obs'], df.lon, {'long_name': 'Longitude', 'units':'degrees_east'}),
        latitude=(['obs'], df.lat, {'long_name': 'Latitude', 'units':'degrees_north'}),
        time=(['obs'], df.datetime, {'long_name': 'Time'}),
        ids=(['obs'], df.index, {'long_name': "Buoy ID for all observations", 'units':'-'}),
    ),

    attrs={
        'title': 'Example of ragged array creation for a typical Lagrangian data set',
    }
)

In [None]:
ds