In [38]:
import os 
from pydap.client import open_url
from datetime import datetime
import numpy as np
import pandas as pd
from datetime import timedelta
import xarray as xr
from scipy.interpolate import LinearNDInterpolator
from datetime import date
import os
from tqdm import tqdm

def get_interp_cygnnss(cygnss_df, era_5_df, oskar_df):
    interp_u10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                      era_5_df['u10'])
    interp_v10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                      era_5_df['v10'])

    lons_to_interpolate = cygnss_df["sp_lon"].to_numpy()
    lats_to_interpolate = cygnss_df["sp_lat"].to_numpy()
    times_to_interpolate = cygnss_df["hours_since_ref"].to_numpy()
    
    u10 = interp_u10(lons_to_interpolate, lats_to_interpolate, times_to_interpolate)
    v10 = interp_v10(lons_to_interpolate, lats_to_interpolate, times_to_interpolate)
    
    interp_u = LinearNDInterpolator(list(zip(oskar_df['sp_lat'], oskar_df['sp_lon'], oskar_df['hours_since_ref'])),
                                    oskar_df['u'])
    interp_v = LinearNDInterpolator(list(zip(oskar_df['sp_lat'], oskar_df['sp_lon'], oskar_df['hours_since_ref'])),
                                    oskar_df['v'])

    u_current = interp_u(lats_to_interpolate, lons_to_interpolate, times_to_interpolate)
    v_current = interp_v(lats_to_interpolate, lons_to_interpolate, times_to_interpolate)
    
    total_wind = np.sqrt(u10 ** 2 + v10 ** 2)
    
    diff_u = u10 - u_current
    diff_v = v10 - v_current
    delta = np.sqrt(diff_u ** 2 + diff_v ** 2)
    cygnss_df['delta'] = delta
    cygnss_df['total_wind'] = total_wind
    return cygnss_df
    
    
def relevant_files(input_arguments, directory, include_extra = False):
    files = os.listdir(directory)    
    # Sort file names by name
    files = sorted(files) 

    relevant_files = []
    for index, file_name in enumerate(files):
        for argument in input_arguments:
            if file_name.startswith(argument):
                relevant_files.append(file_name)
    if include_extra:

        index_extra_start = files.index(relevant_files[0]) - 1
        index_extra_end = files.index(relevant_files[-1]) + 1
    
        relevant_files.append(files[index_extra_start])
        relevant_files.append(files[index_extra_end])
    
    relevant_files = [directory + '/' + s for s in relevant_files]

    return relevant_files

In [39]:
years = ['2017', '2018', '2019','2020', '2021', '2022']
days = [str(x).zfill(2) for x in range(1, 32)]
for i in tqdm(range(len(years))):
    if years[i] == '2017':
        months = ['03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]
    elif years[i] == '2022':
        months = ['01']
    else:
        months = ['01','02','03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]
        
    for month in months:
        cygnss_df_month = []
        for i in range(1, len(days)-2):
            if i == 28:
                input_arguments = [str(years[i]) + '_' + str(month) + "_" + str(i),
                                  str(years[i]) + '_' + str(month) + "_" + str(i+1),
                                  str(years[i]) + '_' + str(month) + "_" + str(i+2),
                                  str(years[i]) + '_' + str(month) + "_" + str(i+3)]
            else:  
                input_arguments = [str(years[i]) + '_' + str(month) + "_" + str(i),
                                  str(years[i]) + '_' + str(month) + "_" + str(i+1),
                                  str(years[i]) + '_' + str(month) + "_" + str(i+2)]
            era_5_files = relevant_files(input_arguments, 'era_5', False)
            cygnss_files = relevant_files(input_arguments,'level_2_mss')
            oskar_files = relevant_files(input_arguments, 'oskar_data', False)        
            cygnss_df_month.append(get_interp_cygnnss(pd.concat(map(pd.read_csv, cygnss_files )), 
                                           pd.concat(map(pd.read_csv, era_5_files )), 
                                           pd.concat(map(pd.read_csv, oskar_files ))))
        if cygnss_df_month:
            df = pd.concat(cygnss_df_month)
            df.to_csv("colocated_data/" + years[i] + month + ".csv" ,index=False)

  0%|          | 0/6 [00:00<?, ?it/s]


ValueError: No objects to concatenate

In [5]:
oskar_files = relevant_files('2021', '10', 'oskar_data')        
pd.concat(map(pd.read_csv, oskar_files )), oskar_files

(            u         v   lat        lon    time
 0   -0.011145  0.079336  38.0  20.000000  254112
 1   -0.035046  0.012932  38.0  20.333333  254112
 2   -0.032004  0.026066  38.0  21.000000  254112
 3   -0.020622 -0.002753  38.0  24.333333  254112
 4   -0.010858 -0.099207  38.0  24.666667  254112
 ..        ...       ...   ...        ...     ...
 585  0.017822 -0.018969  38.0  18.333333  254352
 586 -0.001285 -0.064473  38.0  18.666667  254352
 587 -0.008747 -0.056620  38.0  19.000000  254352
 588 -0.005189 -0.037838  38.0  19.333333  254352
 589  0.007859  0.030019  38.0  19.666667  254352
 
 [1770 rows x 5 columns],
 ['oskar_data/2021_10_01.csv',
  'oskar_data/2021_10_06.csv',
  'oskar_data/2021_10_11.csv'])

In [6]:
era_5 = relevant_files('2021', '10', 'era_5')        
pd.concat(map(pd.read_csv, era_5 )), era_5

(         sp_lon  sp_lat  hours_since_ref       u10       v10
 0         220.0  -40.00         254112.0  6.962839 -7.173535
 1         220.0  -39.75         254112.0  6.809503 -7.555362
 2         220.0  -39.50         254112.0  6.606271 -7.591383
 3         220.0  -39.25         254112.0  6.427783 -7.515102
 4         220.0  -39.00         254112.0  6.228608 -7.562142
 ...         ...     ...              ...       ...       ...
 1240339   260.0   39.00         254255.0 -3.388193  3.753215
 1240340   260.0   39.25         254255.0 -3.601972  4.017230
 1240341   260.0   39.50         254255.0 -3.780459  4.247342
 1240342   260.0   39.75         254255.0 -3.966248  4.400751
 1240343   260.0   40.00         254255.0 -4.017766  4.408803
 
 [7442064 rows x 5 columns],
 ['era_5/2021_10_01.csv',
  'era_5/2021_10_02.csv',
  'era_5/2021_10_03.csv',
  'era_5/2021_10_04.csv',
  'era_5/2021_10_05.csv',
  'era_5/2021_10_06.csv'])

In [7]:
level_2_mss = relevant_files('2021', '10', 'level_2_mss')        
pd.concat(map(pd.read_csv, level_2_mss )), level_2_mss

(             mss        lat         lon    sample_time
 0       0.059714  -9.405056  183.754288  254136.000000
 1       0.073373 -14.346755  198.040985  254136.000000
 2       0.025884  12.835819  150.561890  254136.000000
 3       0.035982  11.183601  151.437119  254136.000000
 4       0.063969  -9.412897  183.765381  254136.000069
 ...          ...        ...         ...            ...
 708135  0.037600 -21.455124  279.856140  254231.999757
 708136  0.058287 -29.680683  263.662659  254231.999757
 708137  0.033945 -17.025261  152.870743  254231.999861
 708138  0.032921 -19.155619  276.194824  254231.999861
 708139  0.040810 -27.301825  260.110382  254231.999861
 
 [2923721 rows x 4 columns],
 ['level_2_mss/2021_10_02.csv',
  'level_2_mss/2021_10_03.csv',
  'level_2_mss/2021_10_04.csv',
  'level_2_mss/2021_10_05.csv'])

In [31]:
days = [str(x).zfill(2) for x in range(1, 32)]
for i in range(1, len(days)-2):
    if i == 28:
        input_arguments = [str(2001) + '_' + str(2001) + "_" + str(i+1)]
input_arguments

['2001_2001_29']