In [1]:
import numpy as np
import pandas as pd
from scipy.interpolate import LinearNDInterpolator
import os
from tqdm import tqdm
import datetime



def interpolate_era_5(era_5_df, cygnss_df, component):
    
    if component == 'v10':

        interp_v10 = LinearNDInterpolator(list(zip(era_5_df['lon'], era_5_df['lat'], era_5_df['time'])),
                                          era_5_df['v10'])
        return interp_v10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["time"].to_numpy())

    
    else:

        interp_u10 = LinearNDInterpolator(list(zip(era_5_df['lon'], era_5_df['lat'], era_5_df['time'])),
                                          era_5_df['u10'])
        return interp_u10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["time"].to_numpy())
    
    

def interpolate_oskar(oskar_df, cygnss_df, component):
    
    if component == 'v':
        interp_v = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['v'])

        return interp_v(cygnss_df["lat"].to_numpy(), cygnss_df["lon"].to_numpy(), cygnss_df["time"].to_numpy())

    
    else:

        interp_u = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['u'])

        return interp_u(cygnss_df["lat"].to_numpy(), cygnss_df["lon"].to_numpy(), cygnss_df["time"].to_numpy())


    

def get_interp_cygnnss(cygnss_df, era_5_df, oskar_df):

    v10 = interpolate_era_5(era_5_df, cygnss_df, 'v10')
    u10 = interpolate_era_5(era_5_df, cygnss_df, 'u10')
    v_current = interpolate_oskar(oskar_df, cygnss_df, 'v')
    u_current = interpolate_oskar(oskar_df, cygnss_df, 'u')
    
    # MEMORY 
    del era_5_df
    del oskar_df
        
    diff_u = u10 - u_current
    diff_v = v10 - v_current
    
    total_wind = np.sqrt(u10 ** 2 + v10 ** 2)
    delta = np.sqrt(diff_u ** 2 + diff_v ** 2)
    
    del v10
    del u10
    del u_current
    del v_current
        
    cygnss_df['delta'] = delta
    cygnss_df['wind'] = total_wind
    
    return cygnss_df


def get_relevant_filenames(cygnss_filename, data_set):
    relevant_filenames = [cygnss_filename ]
    date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
    if data_set == "era_5":
        next_date = date + datetime.timedelta(days=1)
        next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2) +".csv"
        relevant_filenames.append(next_day_string)
        return relevant_filenames
    else:
        for i in range(-7, 0):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
        for i in range(1, 8):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        return relevant_filenames


def relevant_files(input_arguments, directory):
    files = os.listdir(directory)    
    # Sort file names by name
    files = sorted(files) 

    relevant_files = []
    for index, file_name in enumerate(files):
        for argument in input_arguments:
            if file_name.startswith(argument):
                relevant_files.append(file_name)   
    relevant_files = [directory + '/' + s for s in relevant_files]

    return relevant_files 

In [2]:
files = os.listdir("reduced_size_cr_1/level_2_mss")
for file in tqdm(files):
    
    potensial_era_5_files = get_relevant_filenames(file, 'era_5')
    potensial_oskar_files = get_relevant_filenames(file, 'oskar')
        
    era_5_files = relevant_files(potensial_era_5_files, 'reduced_size/era_5')
    oskar_files = relevant_files(potensial_oskar_files, 'reduced_size/oskar_data')

    if len(era_5_files) > 1 and len(oskar_files) > 1:
        df = get_interp_cygnnss(pd.read_csv("reduced_size/level_2_mss/" + file), 
                                pd.concat(map(pd.read_csv, era_5_files )), 
                                pd.concat(map(pd.read_csv, oskar_files )))

        df.to_csv("colocated_data_cr_1/" + file,index=False)
    else:
        print(file)
        print(era_5_files)
        print(oskar_files)

 17%|█▋        | 25/143 [1:31:59<7:23:14, 225.38s/it]

2017_04_15.csv
[]
['reduced_size/oskar_data/2017_04_12.csv', 'reduced_size/oskar_data/2017_04_17.csv', 'reduced_size/oskar_data/2017_04_22.csv']
2017_04_16.csv
['reduced_size/era_5/2017_04_17.csv']
['reduced_size/oskar_data/2017_04_12.csv', 'reduced_size/oskar_data/2017_04_17.csv', 'reduced_size/oskar_data/2017_04_22.csv']


 38%|███▊      | 54/143 [3:57:46<9:26:45, 382.09s/it] 

2017_05_14.csv
['reduced_size/era_5/2017_05_14.csv']
['reduced_size/oskar_data/2017_05_07.csv', 'reduced_size/oskar_data/2017_05_12.csv', 'reduced_size/oskar_data/2017_05_17.csv']
2017_05_15.csv
[]
['reduced_size/oskar_data/2017_05_12.csv', 'reduced_size/oskar_data/2017_05_17.csv', 'reduced_size/oskar_data/2017_05_22.csv']
2017_05_16.csv
['reduced_size/era_5/2017_05_17.csv']
['reduced_size/oskar_data/2017_05_12.csv', 'reduced_size/oskar_data/2017_05_17.csv', 'reduced_size/oskar_data/2017_05_22.csv']


 59%|█████▉    | 85/143 [6:17:19<3:19:24, 206.29s/it]

2017_06_14.csv
['reduced_size/era_5/2017_06_14.csv']
['reduced_size/oskar_data/2017_06_07.csv', 'reduced_size/oskar_data/2017_06_12.csv', 'reduced_size/oskar_data/2017_06_17.csv']
2017_06_15.csv
[]
['reduced_size/oskar_data/2017_06_12.csv', 'reduced_size/oskar_data/2017_06_17.csv', 'reduced_size/oskar_data/2017_06_22.csv']
2017_06_16.csv
['reduced_size/era_5/2017_06_17.csv']
['reduced_size/oskar_data/2017_06_12.csv', 'reduced_size/oskar_data/2017_06_17.csv', 'reduced_size/oskar_data/2017_06_22.csv']


 80%|████████  | 115/143 [9:19:42<2:38:41, 340.07s/it]

2017_07_14.csv
['reduced_size/era_5/2017_07_14.csv']
['reduced_size/oskar_data/2017_07_07.csv', 'reduced_size/oskar_data/2017_07_12.csv', 'reduced_size/oskar_data/2017_07_17.csv']
2017_07_15.csv
[]
['reduced_size/oskar_data/2017_07_12.csv', 'reduced_size/oskar_data/2017_07_17.csv', 'reduced_size/oskar_data/2017_07_22.csv']
2017_07_16.csv
['reduced_size/era_5/2017_07_17.csv']
['reduced_size/oskar_data/2017_07_12.csv', 'reduced_size/oskar_data/2017_07_17.csv', 'reduced_size/oskar_data/2017_07_22.csv']


100%|██████████| 143/143 [10:36:12<00:00, 266.94s/it] 

2017_08_10.csv
['reduced_size/era_5/2017_08_10.csv']
['reduced_size/oskar_data/2017_08_06.csv', 'reduced_size/oskar_data/2017_08_12.csv']



