### This code removes drogued drifters by temporally selecting drifter data after the date they lost their drogue. Next, it separates ultimately beaching drifters from never-beaching drifters and saves them to csv files

In [6]:
# set working directory
import os
os.chdir('/dat1/openonic/Drifters') # directory

In [7]:
# dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr

In [8]:
# load file
gdp = xr.open_dataset('gdp.nc', decode_times=False)

In [4]:
import time
# set up returned dataframe
variables = ['id', 'time', 'lat', 'lon', 've', 'vn'] 
data_list = []

# extract values
ids = gdp.ids.values
times = gdp.time.values
lats = gdp.latitude.values
lons = gdp.longitude.values
ves = gdp.ve.values
vns = gdp.vn.values

In [5]:
# extract values and set to pandas dataframe
start_time = time.time()
data_list = [
    {'id': i, 'time': time, 'lat': lat, 'lon': lon, 've': ve, 'vn': vn}
    for i, time, lat, lon, ve, vn in zip(ids, times, lats, lons, ves, vns)
]

full_data = pd.DataFrame(data_list, columns=variables)

end_time = time.time()

elapsed_time = end_time - start_time

print(f"Elapsed time: {elapsed_time} seconds")

Elapsed time: 498.8266108036041 seconds


In [6]:
beach_ids = []
unbeach_ids = []

for i in range(len(gdp.type_death)):
    if gdp.type_death[i].item() == 1:
        beach_ids.append(gdp.ID[i].item())
    else:
        unbeach_ids.append(gdp.ID[i].item())

In [7]:
beach_full = full_data[full_data['id'].isin(beach_ids)] # get all ids that beached
unbeach_full = full_data[full_data['id'].isin(unbeach_ids)] # get all ids that didn't beach

In [8]:
min_time = gdp.drogue_lost_date.values # get time of drogue lost
min_ID = gdp.drogue_lost_date.coords['ID'].values # get ids of drogue lost
min_time_df = pd.DataFrame({'min_time': min_time, 'id': min_ID}) # put them in a DF together
beach_min_time = min_time_df[min_time_df['id'].isin(beach_ids)] # separate into beached
unbeach_min_time = min_time_df[min_time_df['id'].isin(unbeach_ids)] # and unbeached

In [9]:
# beached
merged_beach = pd.merge(beach_full, beach_min_time, left_on='id', right_on='id', how='inner')

# remove drogued values using time of drogue loss
undrogued_beach_ = merged_beach[merged_beach['time'] >= merged_beach['min_time']]

# remove drogue loss time from df
undrogued_beach = undrogued_beach_.drop(columns=['min_time'])

In [10]:
# unbeached
merged_unbeach = pd.merge(unbeach_full, unbeach_min_time, left_on='id', right_on='id', how='inner')

# remove drogued values using time of drogue loss
undrogued_unbeach_ = merged_unbeach[merged_unbeach['time'] >= merged_unbeach['min_time']]

# remove drogue loss time from df
undrogued_unbeach = undrogued_unbeach_.drop(columns=['min_time'])

In [15]:
# add column to undrogued_beach 'time to beach'


# get the last time values of beached
last_points = undrogued_beach.drop_duplicates(subset='id', keep='last')

def find_time_to_beach(beach, beach_last,time_between_register_beaching_and_actually_beaching):
    # DF to Numpy Arrays
    beach_array = beach.to_numpy()
    beach_last_array = beach_last.to_numpy()

    # Create a dictionary to store the last known time for each ID
    last_time_dict = dict(zip(beach_last_array[:, 0], beach_last_array[:, 1]))

    beach_time = []

    # Iterate through beach array and calculate time differences
    for row in beach_array:
        current_ID = row[0]
        last_time = last_time_dict.get(current_ID, None)
        
        if last_time is not None:
            current_time = row[1]
            if current_time != last_time: # prevent 0 value --> infinite time as beach_last exists within beach
                time_difference = last_time - current_time
                beach_time.append(time_difference)
            if current_time == last_time: # keep lat, lon, and values for hist shape the same
                beach_time.append(time_between_register_beaching_and_actually_beaching)

    return beach_time

time_between_register_beaching_and_actually_beaching = 0 # immediate beaching (?)

beach_time = find_time_to_beach(undrogued_beach, last_points, time_between_register_beaching_and_actually_beaching)

In [19]:
undrogued_beach['time_to_beach'] = beach_time

In [23]:
undrogued_unbeach.to_csv('undrogued_unbeach.csv', index=False) # save to csv

In [24]:
undrogued_beach.to_csv('undrogued_beach.csv', index=False) # save to csv