In [3]:
import numpy as np
import pandas as pd
from scipy.interpolate import LinearNDInterpolator
import os
from tqdm import tqdm
import datetime
from pathlib import Path



def interpolate_era_5(era_5_df, cygnss_df, component):
    
    if component == 'v10':

        interp_v10 = LinearNDInterpolator(list(zip(era_5_df['lon'], era_5_df['lat'], era_5_df['time'])),
                                          era_5_df['v10'])
        return interp_v10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["time"].to_numpy())

    
    else:

        interp_u10 = LinearNDInterpolator(list(zip(era_5_df['lon'], era_5_df['lat'], era_5_df['time'])),
                                          era_5_df['u10'])
        return interp_u10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["time"].to_numpy())
    
    

def interpolate_oskar(oskar_df, cygnss_df, component):
    
    if component == 'v':
        interp_v = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['v'])

        return interp_v(cygnss_df["lat"].to_numpy(), cygnss_df["lon"].to_numpy(), cygnss_df["time"].to_numpy())

    
    else:

        interp_u = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['u'])

        return interp_u(cygnss_df["lat"].to_numpy(), cygnss_df["lon"].to_numpy(), cygnss_df["time"].to_numpy())
    
def interpolate_nearest(cygnss_df, era_5_df):
    
    for index, row in cygnss_df.iterrows():
        lat = round(row['lat']*4)/4
        lon = round(row['lon']*4)/4
        time = round(row['time'])
    sub_df = era_5_df[(era_5_df.lat == lat) & (era_5_df.lon == lon) & (era_5_df.time == time)]       
    return sub_df['u10'], sub_df['v10']

def interpolate_nearest_vec(lat, lon, time, era_5_df):
    lat = round(lat*4)/4
    lon = round(lon*4)/4
    time = round(time)
    sub_df = era_5_df[(era_5_df.lat == lat) & (era_5_df.lon == lon) & (era_5_df.time == time)]       
    return sub_df['u10'], sub_df['v10']

    

def get_interp_cygnnss(cygnss_df, era_5_df, oskar_df):
    print("hei")
    cygnss_df['u10_v10_tup'] = cygnss_df.apply(lambda row: interpolate_nearest_vec(row.lat, row.lon, row.time, era_5_df), axis = 1)
    print("hei")

    '''
    u10, v10 = interpolate_nearest(cygnss_df, era_5_df)
    v10 = interpolate_era_5(era_5_df, cygnss_df, 'v10')
    u10 = interpolate_era_5(era_5_df, cygnss_df, 'u10')
    print("hei")

    v_current = interpolate_oskar(oskar_df, cygnss_df, 'v')
    u_current = interpolate_oskar(oskar_df, cygnss_df, 'u')
    print("hei")
    
    # MEMORY 
    del era_5_df
    del oskar_df
        
    diff_u = u10 - u_current
    diff_v = v10 - v_current
    
    total_wind = np.sqrt(u10 ** 2 + v10 ** 2)
    delta = np.sqrt(diff_u ** 2 + diff_v ** 2)
    
    del v10
    del u10
    del u_current
    del v_current
        
    cygnss_df['delta'] = delta
    cygnss_df['wind'] = total_wind
    '''

    return cygnss_df


def get_relevant_filenames(cygnss_filename, data_set):
    relevant_filenames = [cygnss_filename ]
    date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
    if data_set == "era_5":
        '''
        next_date = date + datetime.timedelta(days=1)
        next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2) +".csv"
        relevant_filenames.append(next_day_string)
        '''
        return relevant_filenames
    else:
        for i in range(-7, 0):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
        for i in range(1, 8):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        return relevant_filenames


def relevant_files(input_arguments, directory):
    files = os.listdir(directory)    
    # Sort file names by name
    files = sorted(files) 

    relevant_files = []
    for index, file_name in enumerate(files):
        for argument in input_arguments:
            if file_name.startswith(argument):
                relevant_files.append(file_name)   
    relevant_files = [directory + '/' + s for s in relevant_files]

    return relevant_files 

In [4]:
output_dir = "colocated_data_full_size"
input_dir = "D:/"
files = os.listdir(input_dir + "/level_2_mss")
for file_1 in tqdm(files):
    my_file = Path(output_dir + "/" + file_1)
    if not my_file.is_file():
        potensial_era_5_files = get_relevant_filenames(file_1, 'era_5')
        potensial_oskar_files = get_relevant_filenames(file_1, 'oskar')

        era_5_files = relevant_files(potensial_era_5_files, input_dir + '/era_5')
        oskar_files = relevant_files(potensial_oskar_files, input_dir + '/oskar_data')

        if len(era_5_files) > 0 and len(oskar_files) > 1:
            print(file_1)
            print(era_5_files)
            print(oskar_files)
            
            df = get_interp_cygnnss(pd.read_csv(input_dir + "/level_2_mss/" + file_1), 
                                    pd.concat(map(pd.read_csv, era_5_files )), 
                                    pd.concat(map(pd.read_csv, oskar_files )))

            df.to_csv(output_dir + "/" + file_1,index=False)
        else:
            print(file_1)
            print(era_5_files)
            print(oskar_files)

FileNotFoundError: [WinError 3] Systemet finner ikke angitt bane: 'D://level_2_mss'