In [1]:
import numpy as np
import pandas as pd
from scipy.interpolate import LinearNDInterpolator
import os
from tqdm import tqdm

def get_interp_cygnnss(cygnss_df, era_5_df, oskar_df):
    
    interp_u10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                      era_5_df['u10'])
    interp_v10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                      era_5_df['v10'])
    
    u10 = interp_u10(cygnss_df["lon"].to_numpy(), cygnss_df["lat"].to_numpy(), cygnss_df["sample_time"].to_numpy())
    
    v10 = interp_v10(lons_to_interpolate, lats_to_interpolate, times_to_interpolate)
    
    interp_u = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lat'], oskar_df['time'])),oskar_df['u'])
    
    interp_v = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['v'])

    u_current = interp_u(lats_to_interpolate, lons_to_interpolate, times_to_interpolate)
    
    v_current = interp_v(lats_to_interpolate, lons_to_interpolate, times_to_interpolate)
    
    total_wind = np.sqrt(u10 ** 2 + v10 ** 2)
    
    diff_u = u10 - u_current
    diff_v = v10 - v_current
    
    delta = np.sqrt(diff_u ** 2 + diff_v ** 2)
    
    cygnss_df['delta'] = delta
    cygnss_df['total_wind'] = total_wind
    
    return cygnss_df
    
    
def relevant_files(input_arguments, directory, include_extra = False):
    files = os.listdir(directory)    
    # Sort file names by name
    files = sorted(files) 

    relevant_files = []
    for index, file_name in enumerate(files):
        for argument in input_arguments:
            if file_name.startswith(argument):
                relevant_files.append(file_name)
    if include_extra:

        index_extra_start = files.index(relevant_files[0]) - 1
        index_extra_end = files.index(relevant_files[-1]) + 1
    
        relevant_files.append(files[index_extra_start])
        relevant_files.append(files[index_extra_end])
    
    relevant_files = [directory + '/' + s for s in relevant_files]

    return relevant_files

In [None]:
years = ['2017', '2018', '2019','2020', '2021', '2022']
days = [str(x).zfill(2) for x in range(1, 32)]
for i in tqdm(range(len(years))):
    if years[i] == '2017':
        months = ['03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]
    elif years[i] == '2022':
        months = ['01']
    else:
        months = ['01','02','03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]
        
    for month in months:
        cygnss_df_month = []
        days = [str(x).zfill(2) for x in range(1, 32)]
        for j in range(0, len(days)-1, 3):
            if j == 27:
                input_arguments = [str(years[i]) + '_' + str(month) + "_" + days[j],
                                  str(years[i]) + '_' + str(month) + "_" + days[j+1],
                                  str(years[i]) + '_' + str(month) + "_" + days[j+2],
                                  str(years[i]) + '_' + str(month) + "_" + days[j+3]]
            else:  
                input_arguments = [str(years[i]) + '_' + str(month) + "_" + days[j],
                                  str(years[i]) + '_' + str(month) + "_" + days[j+1],
                                  str(years[i]) + '_' + str(month) + "_" + days[j+2]]        
            era_5_files = relevant_files(input_arguments, 'era_5', False)
            cygnss_files = relevant_files(input_arguments,'level_2_mss')
            oskar_files = relevant_files(input_arguments, 'oskar_data', False)
            
            if cygnss_files:
                print("hEI")
                try:
                    cygnss_df_month.append(get_interp_cygnnss(pd.concat(map(pd.read_csv, cygnss_files )), 
                                                   pd.concat(map(pd.read_csv, era_5_files )), 
                                                   pd.concat(map(pd.read_csv, oskar_files ))))
                except:
                    print(cygnss_files)
                    print(era_5_files)
                    print(oskar_files)


        if cygnss_df_month:
            print("TO")
            df = pd.concat(cygnss_df_month)
            df.to_csv("colocated_data/" + years[i] + month + ".csv" ,index=False)

  0%|          | 0/6 [00:00<?, ?it/s]

hEI


In [None]:
oskar_files = relevant_files('2021', '10', 'oskar_data')        
pd.concat(map(pd.read_csv, oskar_files )), oskar_files

In [None]:
era_5 = relevant_files('2021', '10', 'era_5')        
pd.concat(map(pd.read_csv, era_5 )), era_5

In [None]:
level_2_mss = relevant_files('2021', '10', 'level_2_mss')        
pd.concat(map(pd.read_csv, level_2_mss )), level_2_mss