In [1]:
import numpy as np
import pandas as pd
from scipy.interpolate import LinearNDInterpolator
import os
from tqdm import tqdm
import datetime



def interpolate_era_5(era_5_df, cygnss_df, component):
    
    if component == 'v10':

        interp_v10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                          era_5_df['v10'])
        return interp_v10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["sample_time"].to_numpy())

    
    else:

        interp_u10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                          era_5_df['u10'])
        return interp_u10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["sample_time"].to_numpy())
    
    

def interpolate_oskar(oskar_df, cygnss_df, component):
    
    if component == 'v':
        interp_v = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['v'])

        return interp_v(cygnss_df["lat"].to_numpy(), cygnss_df["lon"].to_numpy(), cygnss_df["sample_time"].to_numpy())

    
    else:

        interp_u = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['u'])

        return interp_u(cygnss_df["lat"].to_numpy(), cygnss_df["lon"].to_numpy(), cygnss_df["sample_time"].to_numpy())


    

def get_interp_cygnnss(cygnss_df, era_5_df, oskar_df):

    print("Interp v10")
    v10 = interpolate_era_5(era_5_df, cygnss_df, 'v10')
    print("Interp u10")
    u10 = interpolate_era_5(era_5_df, cygnss_df, 'u10')
    print("Interp v")
    v_current = interpolate_oskar(oskar_df, cygnss_df, 'v')
    print("Interp u")
    u_current = interpolate_oskar(oskar_df, cygnss_df, 'u')
    
    # MEMORY 
    del era_5_df
    del oskar_df
        
    diff_u = u10 - u_current
    diff_v = v10 - v_current
    
    total_wind = np.sqrt(u10 ** 2 + v10 ** 2)
    delta = np.sqrt(diff_u ** 2 + diff_v ** 2)
    
    del v10
    del u10
    del u_current
    del v_current
    
    print("Done calcu")
    
    cygnss_df['delta'] = delta
    cygnss_df['total_wind'] = total_wind
    
    print("done fixing df")

    return cygnss_df


def get_relevant_filenames(cygnss_filename, data_set):
    relevant_filenames = [cygnss_filename ]
    date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
    if data_set == "era_5":
        next_date = date + datetime.timedelta(days=1)
        next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2) +".csv"
        relevant_filenames.append(next_day_string)
        return relevant_filenames
    else:
        for i in range(-7, 0):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
        for i in range(1, 8):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        return relevant_filenames


def relevant_files(input_arguments, directory):
    files = os.listdir(directory)    
    # Sort file names by name
    files = sorted(files) 

    relevant_files = []
    for index, file_name in enumerate(files):
        for argument in input_arguments:
            if file_name.startswith(argument):
                relevant_files.append(file_name)   
    relevant_files = [directory + '/' + s for s in relevant_files]

    return relevant_files 

In [None]:
colocated_df_monthly = []
files = os.listdir("level_2_mss")
for file in tqdm(files):
    
    potensial_era_5_files = get_relevant_filenames(file, 'era_5')
    potensial_oskar_files = get_relevant_filenames(file, 'oskar')
        
    era_5_files = relevant_files(potensial_era_5_files, 'era_5')
    oskar_files = relevant_files(potensial_oskar_files, 'oskar_data')

    print("Processing these files : ")
    print(file)
    print(era_5_files)
    print(oskar_files)
    
    colocated_df_monthly.append(get_interp_cygnnss(pd.read_csv("level_2_mss" + "/" + file), 
                                   pd.concat(map(pd.read_csv, era_5_files )), 
                                   pd.concat(map(pd.read_csv, oskar_files ))))

    if colocated_df_monthly:
        print("Creating Montly Colacated Data CSV")
        df = pd.concat(colocated_df_monthly)
        #MEMORY
        colocated_df_monthly = []
        df.to_csv("colocated_data/" + file + ".csv" ,index=False)

  0%|          | 0/143 [00:00<?, ?it/s]

Processing these files : 
2017_03_18.csv
['era_5/2017_03_18.csv', 'era_5/2017_03_19.csv']
['oskar_data/2017_03_18.csv', 'oskar_data/2017_03_23.csv']
Interp v10
