In [1]:
import numpy as np
import pandas as pd
from scipy.interpolate import LinearNDInterpolator
import os
from tqdm import tqdm
import datetime
from pathlib import Path



def interpolate_era_5(era_5_df, cygnss_df, component):
    
    if component == 'v10':

        interp_v10 = LinearNDInterpolator(list(zip(era_5_df['lon'], era_5_df['lat'], era_5_df['time'])),
                                          era_5_df['v10'])
        return interp_v10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["time"].to_numpy())

    
    else:

        interp_u10 = LinearNDInterpolator(list(zip(era_5_df['lon'], era_5_df['lat'], era_5_df['time'])),
                                          era_5_df['u10'])
        return interp_u10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["time"].to_numpy())
    
    

def interpolate_oskar(oskar_df, cygnss_df, component):
    
    if component == 'v':
        interp_v = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['v'])

        return interp_v(cygnss_df["lat"].to_numpy(), cygnss_df["lon"].to_numpy(), cygnss_df["time"].to_numpy())

    
    else:

        interp_u = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['u'])

        return interp_u(cygnss_df["lat"].to_numpy(), cygnss_df["lon"].to_numpy(), cygnss_df["time"].to_numpy())


    

def get_interp_cygnnss(cygnss_df, era_5_df, oskar_df):

    v10 = interpolate_era_5(era_5_df, cygnss_df, 'v10')
    u10 = interpolate_era_5(era_5_df, cygnss_df, 'u10')
    v_current = interpolate_oskar(oskar_df, cygnss_df, 'v')
    u_current = interpolate_oskar(oskar_df, cygnss_df, 'u')
    
    # MEMORY 
    del era_5_df
    del oskar_df
        
    diff_u = u10 - u_current
    diff_v = v10 - v_current
    
    total_wind = np.sqrt(u10 ** 2 + v10 ** 2)
    delta = np.sqrt(diff_u ** 2 + diff_v ** 2)
    
    del v10
    del u10
    del u_current
    del v_current
        
    cygnss_df['delta'] = delta
    cygnss_df['wind'] = total_wind
    
    return cygnss_df


def get_relevant_filenames(cygnss_filename, data_set):
    relevant_filenames = [cygnss_filename ]
    date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
    if data_set == "era_5":
        next_date = date + datetime.timedelta(days=1)
        next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2) +".csv"
        relevant_filenames.append(next_day_string)
        return relevant_filenames
    else:
        for i in range(-7, 0):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
        for i in range(1, 8):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        return relevant_filenames


def relevant_files(input_arguments, directory):
    files = os.listdir(directory)    
    # Sort file names by name
    files = sorted(files) 

    relevant_files = []
    for index, file_name in enumerate(files):
        for argument in input_arguments:
            if file_name.startswith(argument):
                relevant_files.append(file_name)   
    relevant_files = [directory + '/' + s for s in relevant_files]

    return relevant_files 

In [None]:
output_dir = "colocated_data_cr_2"
input_dir = "reduced_size_cr_2"
files = os.listdir(input_dir + "/level_2_mss")
for file_1 in tqdm(files):
    my_file = Path(output_dir + "/" + file_1)
    if not my_file.is_file():
        potensial_era_5_files = get_relevant_filenames(file_1, 'era_5')
        potensial_oskar_files = get_relevant_filenames(file_1, 'oskar')

        era_5_files = relevant_files(potensial_era_5_files, input_dir + '/era_5')
        oskar_files = relevant_files(potensial_oskar_files, input_dir + '/oskar_data')

        if len(era_5_files) > 1 and len(oskar_files) > 1:
            df = get_interp_cygnnss(pd.read_csv(input_dir + "/level_2_mss/" + file_1), 
                                    pd.concat(map(pd.read_csv, era_5_files )), 
                                    pd.concat(map(pd.read_csv, oskar_files )))

            df.to_csv(output_dir + "/" + file_1,index=False)
        else:
            print(file_1)
            print(era_5_files)
            print(oskar_files)

  7%|▋         | 116/1775 [00:00<00:01, 1128.11it/s]

2017_04_15.csv
[]
['reduced_size_cr_2/oskar_data/2017_04_12.csv', 'reduced_size_cr_2/oskar_data/2017_04_17.csv', 'reduced_size_cr_2/oskar_data/2017_04_22.csv']
2017_04_16.csv
['reduced_size_cr_2/era_5/2017_04_17.csv']
['reduced_size_cr_2/oskar_data/2017_04_12.csv', 'reduced_size_cr_2/oskar_data/2017_04_17.csv', 'reduced_size_cr_2/oskar_data/2017_04_22.csv']
2017_05_14.csv
['reduced_size_cr_2/era_5/2017_05_14.csv']
['reduced_size_cr_2/oskar_data/2017_05_07.csv', 'reduced_size_cr_2/oskar_data/2017_05_12.csv', 'reduced_size_cr_2/oskar_data/2017_05_17.csv']
2017_05_15.csv
[]
['reduced_size_cr_2/oskar_data/2017_05_12.csv', 'reduced_size_cr_2/oskar_data/2017_05_17.csv', 'reduced_size_cr_2/oskar_data/2017_05_22.csv']
2017_05_16.csv
['reduced_size_cr_2/era_5/2017_05_17.csv']
['reduced_size_cr_2/oskar_data/2017_05_12.csv', 'reduced_size_cr_2/oskar_data/2017_05_17.csv', 'reduced_size_cr_2/oskar_data/2017_05_22.csv']
2017_06_14.csv
['reduced_size_cr_2/era_5/2017_06_14.csv']
['reduced_size_cr_2/os

 18%|█▊        | 320/1775 [00:00<00:01, 856.03it/s] 

2017_09_16.csv
['reduced_size_cr_2/era_5/2017_09_17.csv']
['reduced_size_cr_2/oskar_data/2017_09_11.csv', 'reduced_size_cr_2/oskar_data/2017_09_16.csv', 'reduced_size_cr_2/oskar_data/2017_09_21.csv']
2017_10_14.csv
['reduced_size_cr_2/era_5/2017_10_14.csv']
['reduced_size_cr_2/oskar_data/2017_10_11.csv', 'reduced_size_cr_2/oskar_data/2017_10_16.csv']
2017_10_15.csv
[]
['reduced_size_cr_2/oskar_data/2017_10_11.csv', 'reduced_size_cr_2/oskar_data/2017_10_16.csv', 'reduced_size_cr_2/oskar_data/2017_10_22.csv']
2017_10_16.csv
['reduced_size_cr_2/era_5/2017_10_17.csv']
['reduced_size_cr_2/oskar_data/2017_10_11.csv', 'reduced_size_cr_2/oskar_data/2017_10_16.csv', 'reduced_size_cr_2/oskar_data/2017_10_22.csv']
2017_11_14.csv
['reduced_size_cr_2/era_5/2017_11_14.csv']
['reduced_size_cr_2/oskar_data/2017_11_11.csv', 'reduced_size_cr_2/oskar_data/2017_11_16.csv', 'reduced_size_cr_2/oskar_data/2017_11_21.csv']
2017_11_15.csv
[]
['reduced_size_cr_2/oskar_data/2017_11_11.csv', 'reduced_size_cr_2/os

 23%|██▎       | 408/1775 [00:00<00:01, 731.96it/s]

2018_03_14.csv
['reduced_size_cr_2/era_5/2018_03_14.csv']
['reduced_size_cr_2/oskar_data/2018_03_07.csv', 'reduced_size_cr_2/oskar_data/2018_03_12.csv', 'reduced_size_cr_2/oskar_data/2018_03_18.csv']
2018_03_15.csv
[]
['reduced_size_cr_2/oskar_data/2018_03_12.csv', 'reduced_size_cr_2/oskar_data/2018_03_18.csv']
2018_03_16.csv
['reduced_size_cr_2/era_5/2018_03_17.csv']
['reduced_size_cr_2/oskar_data/2018_03_12.csv', 'reduced_size_cr_2/oskar_data/2018_03_18.csv', 'reduced_size_cr_2/oskar_data/2018_03_23.csv']
2018_04_14.csv
['reduced_size_cr_2/era_5/2018_04_14.csv']
['reduced_size_cr_2/oskar_data/2018_04_07.csv', 'reduced_size_cr_2/oskar_data/2018_04_12.csv', 'reduced_size_cr_2/oskar_data/2018_04_17.csv']
2018_04_15.csv
[]
['reduced_size_cr_2/oskar_data/2018_04_12.csv', 'reduced_size_cr_2/oskar_data/2018_04_17.csv', 'reduced_size_cr_2/oskar_data/2018_04_22.csv']
2018_04_16.csv
['reduced_size_cr_2/era_5/2018_04_17.csv']
['reduced_size_cr_2/oskar_data/2018_04_12.csv', 'reduced_size_cr_2/os

 31%|███▏      | 559/1775 [00:00<00:01, 672.17it/s]

2018_07_14.csv
['reduced_size_cr_2/era_5/2018_07_14.csv']
['reduced_size_cr_2/oskar_data/2018_07_07.csv', 'reduced_size_cr_2/oskar_data/2018_07_12.csv', 'reduced_size_cr_2/oskar_data/2018_07_17.csv']
2018_07_15.csv
[]
['reduced_size_cr_2/oskar_data/2018_07_12.csv', 'reduced_size_cr_2/oskar_data/2018_07_17.csv', 'reduced_size_cr_2/oskar_data/2018_07_22.csv']
2018_07_16.csv
['reduced_size_cr_2/era_5/2018_07_17.csv']
['reduced_size_cr_2/oskar_data/2018_07_12.csv', 'reduced_size_cr_2/oskar_data/2018_07_17.csv', 'reduced_size_cr_2/oskar_data/2018_07_22.csv']
2018_08_14.csv
['reduced_size_cr_2/era_5/2018_08_14.csv']
['reduced_size_cr_2/oskar_data/2018_08_12.csv', 'reduced_size_cr_2/oskar_data/2018_08_17.csv']
2018_08_15.csv
[]
['reduced_size_cr_2/oskar_data/2018_08_12.csv', 'reduced_size_cr_2/oskar_data/2018_08_17.csv', 'reduced_size_cr_2/oskar_data/2018_08_22.csv']
2018_08_16.csv
['reduced_size_cr_2/era_5/2018_08_17.csv']
['reduced_size_cr_2/oskar_data/2018_08_12.csv', 'reduced_size_cr_2/os

 40%|███▉      | 704/1775 [00:01<00:01, 618.27it/s]

2018_11_15.csv
[]
['reduced_size_cr_2/oskar_data/2018_11_11.csv', 'reduced_size_cr_2/oskar_data/2018_11_16.csv', 'reduced_size_cr_2/oskar_data/2018_11_21.csv']
2018_11_16.csv
['reduced_size_cr_2/era_5/2018_11_17.csv']
['reduced_size_cr_2/oskar_data/2018_11_11.csv', 'reduced_size_cr_2/oskar_data/2018_11_16.csv', 'reduced_size_cr_2/oskar_data/2018_11_21.csv']
2018_12_14.csv
['reduced_size_cr_2/era_5/2018_12_14.csv']
['reduced_size_cr_2/oskar_data/2018_12_11.csv', 'reduced_size_cr_2/oskar_data/2018_12_16.csv', 'reduced_size_cr_2/oskar_data/2018_12_21.csv']
2018_12_15.csv
[]
['reduced_size_cr_2/oskar_data/2018_12_11.csv', 'reduced_size_cr_2/oskar_data/2018_12_16.csv', 'reduced_size_cr_2/oskar_data/2018_12_21.csv']
2018_12_16.csv
['reduced_size_cr_2/era_5/2018_12_17.csv']
['reduced_size_cr_2/oskar_data/2018_12_11.csv', 'reduced_size_cr_2/oskar_data/2018_12_16.csv', 'reduced_size_cr_2/oskar_data/2018_12_21.csv']
2019_01_14.csv
['reduced_size_cr_2/era_5/2019_01_14.csv']
['reduced_size_cr_2/os

 46%|████▌     | 816/1775 [00:01<00:01, 743.50it/s]

2019_03_15.csv
[]
['reduced_size_cr_2/oskar_data/2019_03_12.csv', 'reduced_size_cr_2/oskar_data/2019_03_18.csv']
2019_03_16.csv
['reduced_size_cr_2/era_5/2019_03_17.csv']
['reduced_size_cr_2/oskar_data/2019_03_12.csv', 'reduced_size_cr_2/oskar_data/2019_03_18.csv', 'reduced_size_cr_2/oskar_data/2019_03_23.csv']
2019_04_14.csv
['reduced_size_cr_2/era_5/2019_04_14.csv']
['reduced_size_cr_2/oskar_data/2019_04_07.csv', 'reduced_size_cr_2/oskar_data/2019_04_12.csv', 'reduced_size_cr_2/oskar_data/2019_04_17.csv']
2019_04_15.csv
[]
['reduced_size_cr_2/oskar_data/2019_04_12.csv', 'reduced_size_cr_2/oskar_data/2019_04_17.csv', 'reduced_size_cr_2/oskar_data/2019_04_22.csv']
2019_04_16.csv
['reduced_size_cr_2/era_5/2019_04_17.csv']
['reduced_size_cr_2/oskar_data/2019_04_12.csv', 'reduced_size_cr_2/oskar_data/2019_04_17.csv', 'reduced_size_cr_2/oskar_data/2019_04_22.csv']
2019_05_14.csv
['reduced_size_cr_2/era_5/2019_05_14.csv']
['reduced_size_cr_2/oskar_data/2019_05_07.csv', 'reduced_size_cr_2/os

 56%|█████▌    | 997/1775 [00:01<00:00, 780.36it/s]

2019_09_15.csv
[]
['reduced_size_cr_2/oskar_data/2019_09_11.csv', 'reduced_size_cr_2/oskar_data/2019_09_16.csv', 'reduced_size_cr_2/oskar_data/2019_09_21.csv']
2019_09_16.csv
['reduced_size_cr_2/era_5/2019_09_17.csv']
['reduced_size_cr_2/oskar_data/2019_09_11.csv', 'reduced_size_cr_2/oskar_data/2019_09_16.csv', 'reduced_size_cr_2/oskar_data/2019_09_21.csv']
2019_10_14.csv
['reduced_size_cr_2/era_5/2019_10_14.csv']
['reduced_size_cr_2/oskar_data/2019_10_11.csv', 'reduced_size_cr_2/oskar_data/2019_10_16.csv']
2019_10_15.csv
[]
['reduced_size_cr_2/oskar_data/2019_10_11.csv', 'reduced_size_cr_2/oskar_data/2019_10_16.csv', 'reduced_size_cr_2/oskar_data/2019_10_22.csv']
2019_10_16.csv
['reduced_size_cr_2/era_5/2019_10_17.csv']
['reduced_size_cr_2/oskar_data/2019_10_11.csv', 'reduced_size_cr_2/oskar_data/2019_10_16.csv', 'reduced_size_cr_2/oskar_data/2019_10_22.csv']
2019_11_14.csv
['reduced_size_cr_2/era_5/2019_11_14.csv']
['reduced_size_cr_2/oskar_data/2019_11_11.csv', 'reduced_size_cr_2/os

 66%|██████▌   | 1170/1775 [00:01<00:00, 788.60it/s]

2020_02_16.csv
['reduced_size_cr_2/era_5/2020_02_17.csv']
['reduced_size_cr_2/oskar_data/2020_02_10.csv', 'reduced_size_cr_2/oskar_data/2020_02_15.csv', 'reduced_size_cr_2/oskar_data/2020_02_20.csv']
2020_03_14.csv
['reduced_size_cr_2/era_5/2020_03_14.csv']
['reduced_size_cr_2/oskar_data/2020_03_07.csv', 'reduced_size_cr_2/oskar_data/2020_03_12.csv', 'reduced_size_cr_2/oskar_data/2020_03_17.csv']
2020_03_15.csv
[]
['reduced_size_cr_2/oskar_data/2020_03_12.csv', 'reduced_size_cr_2/oskar_data/2020_03_17.csv', 'reduced_size_cr_2/oskar_data/2020_03_22.csv']
2020_03_16.csv
['reduced_size_cr_2/era_5/2020_03_17.csv']
['reduced_size_cr_2/oskar_data/2020_03_12.csv', 'reduced_size_cr_2/oskar_data/2020_03_17.csv', 'reduced_size_cr_2/oskar_data/2020_03_22.csv']
2020_04_14.csv
['reduced_size_cr_2/era_5/2020_04_14.csv']
['reduced_size_cr_2/oskar_data/2020_04_11.csv', 'reduced_size_cr_2/oskar_data/2020_04_16.csv', 'reduced_size_cr_2/oskar_data/2020_04_21.csv']
2020_04_15.csv
[]
['reduced_size_cr_2/os

 70%|███████   | 1250/1775 [00:01<00:01, 443.21it/s]


['reduced_size_cr_2/oskar_data/2020_07_07.csv', 'reduced_size_cr_2/oskar_data/2020_07_12.csv', 'reduced_size_cr_2/oskar_data/2020_07_17.csv']
2020_07_15.csv
[]
['reduced_size_cr_2/oskar_data/2020_07_12.csv', 'reduced_size_cr_2/oskar_data/2020_07_17.csv', 'reduced_size_cr_2/oskar_data/2020_07_22.csv']
2020_07_16.csv
['reduced_size_cr_2/era_5/2020_07_17.csv']
['reduced_size_cr_2/oskar_data/2020_07_12.csv', 'reduced_size_cr_2/oskar_data/2020_07_17.csv', 'reduced_size_cr_2/oskar_data/2020_07_22.csv']
2020_08_14.csv
['reduced_size_cr_2/era_5/2020_08_14.csv']
['reduced_size_cr_2/oskar_data/2020_08_11.csv', 'reduced_size_cr_2/oskar_data/2020_08_16.csv', 'reduced_size_cr_2/oskar_data/2020_08_21.csv']
2020_08_15.csv
[]
['reduced_size_cr_2/oskar_data/2020_08_11.csv', 'reduced_size_cr_2/oskar_data/2020_08_16.csv', 'reduced_size_cr_2/oskar_data/2020_08_21.csv']
2020_08_16.csv
['reduced_size_cr_2/era_5/2020_08_17.csv']
['reduced_size_cr_2/oskar_data/2020_08_11.csv', 'reduced_size_cr_2/oskar_data/2

 77%|███████▋  | 1371/1775 [00:02<00:00, 464.40it/s]

[]
['reduced_size_cr_2/oskar_data/2020_10_11.csv', 'reduced_size_cr_2/oskar_data/2020_10_16.csv', 'reduced_size_cr_2/oskar_data/2020_10_21.csv']
2020_10_16.csv
['reduced_size_cr_2/era_5/2020_10_17.csv']
['reduced_size_cr_2/oskar_data/2020_10_11.csv', 'reduced_size_cr_2/oskar_data/2020_10_16.csv', 'reduced_size_cr_2/oskar_data/2020_10_21.csv']
2020_11_14.csv
['reduced_size_cr_2/era_5/2020_11_14.csv']
['reduced_size_cr_2/oskar_data/2020_11_11.csv', 'reduced_size_cr_2/oskar_data/2020_11_16.csv', 'reduced_size_cr_2/oskar_data/2020_11_21.csv']
2020_11_15.csv
[]
['reduced_size_cr_2/oskar_data/2020_11_11.csv', 'reduced_size_cr_2/oskar_data/2020_11_16.csv', 'reduced_size_cr_2/oskar_data/2020_11_21.csv']
2020_11_16.csv
['reduced_size_cr_2/era_5/2020_11_17.csv']
['reduced_size_cr_2/oskar_data/2020_11_11.csv', 'reduced_size_cr_2/oskar_data/2020_11_16.csv', 'reduced_size_cr_2/oskar_data/2020_11_21.csv']
2020_12_14.csv
['reduced_size_cr_2/era_5/2020_12_14.csv']
['reduced_size_cr_2/oskar_data/2020_1

 77%|███████▋  | 1371/1775 [00:20<00:00, 464.40it/s]