In [1]:
import numpy as np
import os, re
import pandas as pd

def read_files_GDP(years, file = r'./Drifter_raw'):

    lst = []
    
    for y in years:
        path = file + "/" + str(y)
        for filename in os.listdir(path):
            if filename.endswith(".csv"):
                file_drifter = path+"/"+filename
                arr = pd.read_csv(file_drifter).values
                lst.append(arr)
                
    return lst

In [2]:
years = [2003, 2004, 2005, 2006]
data = read_files_GDP(years)

In [3]:
from scipy import signal
from math import sin
import matplotlib.pyplot as plt

def filter_data(lat, long):
    
    for c in range(len(long)-1):
        
        # sampling frequency of drifters: 1/6hours
        fs = 4/24
        
        # Cut-off period
        T = min(1.5*2*np.pi/(2*7.27*10**(-5)*60*60*np.abs(sin(np.nanmean(list(lat))*np.pi/180))), 5*24)
        f = 1/T
        
        # low-pass frequency
        lowpass_frequency = f/(fs/2)
        
        # Check if there is a crossing from -180° to 180° (or viceversa) in the longitudinal position
        # and transform the longitudinal position from [-180, 180] to [0, 360]
        crossing = False
        
        for c in range(len(long)-1):
            if np.abs((long[c+1]-long[c])) > 100:
                long = np.where(long <= 0, long + 360, long)
                crossing = True
                break

        # Low-pass trajectories
        b, a = signal.butter(6, lowpass_frequency, 'low', analog = False)
        lat = signal.filtfilt(b, a, lat)
        b, a = signal.butter(6, lowpass_frequency, 'low', analog = False)
        long = signal.filtfilt(b, a, long)
        
        # If there is a crossing, then retransform coordinate back into range [-180, 180]
        if crossing:
            long = np.where(long >= 180, long - 360, long)
        
        return lat, long

In [4]:
from datetime import datetime, timedelta
import pandas as pd

for f in os.listdir("./Drifter_processed/"):
    os.remove(os.path.join("./Drifter_processed/", f))
    
# Define begin and end time
time_begin = datetime(2006, 9, 4, 0, 0, 0)
time_end = datetime(2006, 10, 4, 0, 0, 0)

for d in data:
    for i in range(d.shape[0]):
        if (datetime.strptime(d[i, 1], '%Y-%m-%d %H:%M:%S') - time_begin).total_seconds() == 0:
            for ii in range(i, d.shape[0]):
                if (datetime.strptime(d[ii, 1], '%Y-%m-%d %H:%M:%S') - time_end - timedelta(days=2)).total_seconds() == 0:
                    latitude, longitude = filter_data(d[:, 3], d[:, 2])
                    # Only take drifters which are in the Atlantic
                    # It makes no sense to include drifter from remote regions of the ocean 
                    # which never visit the Gulf stream in the time-interval of interest
                    long = longitude[i:ii-8]
                    lat = latitude[i:ii-8]
                    if np.max(lat) < 80 and np.min(lat) > -20 and np.max(long) < -20 and np.min(long) > -90:
                        # We need to extract only 30 days of trajectory and neglect the last 2 days
                        # This is done because when low-pass-filtering the trajectories with 1.5*inertial_Period ~ 2 days
                        # the information about the last two days of the trajectory is lost.
                        df = pd.DataFrame({'time': d[i:ii-8,1], 'longitude': long, 'latitude': lat})
                        df.to_csv("./Drifter_processed/" + str(int(d[0,0])) +".csv", index = False)
                    break
            break