In [7]:
import os 
from pydap.client import open_url
from datetime import datetime
import numpy as np
import pandas as pd
from datetime import timedelta
import xarray as xr
from scipy.interpolate import LinearNDInterpolator
from datetime import date
import os
from tqdm import tqdm

def get_interp_cygnnss(cygnss_df, era_5_df, oskar_df):
    interp_u10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                      era_5_df['u10'])
    interp_v10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                      era_5_df['v10'])

    lons_to_interpolate = cygnss_df["sp_lon"].to_numpy()
    lats_to_interpolate = cygnss_df["sp_lat"].to_numpy()
    times_to_interpolate = cygnss_df["hours_since_ref"].to_numpy()
    
    u10 = interp_u10(lons_to_interpolate, lats_to_interpolate, times_to_interpolate)
    v10 = interp_v10(lons_to_interpolate, lats_to_interpolate, times_to_interpolate)
    
    interp_u = LinearNDInterpolator(list(zip(oskar_df['sp_lat'], oskar_df['sp_lon'], oskar_df['hours_since_ref'])),
                                    oskar_df['u'])
    interp_v = LinearNDInterpolator(list(zip(oskar_df['sp_lat'], oskar_df['sp_lon'], oskar_df['hours_since_ref'])),
                                    oskar_df['v'])

    u_current = interp_u(lats_to_interpolate, lons_to_interpolate, times_to_interpolate)
    v_current = interp_v(lats_to_interpolate, lons_to_interpolate, times_to_interpolate)
    
    total_wind = np.sqrt(u10 ** 2 + v10 ** 2)
    
    diff_u = u10 - u_current
    diff_v = v10 - v_current
    delta = np.sqrt(diff_u ** 2 + diff_v ** 2)
    cygnss_df['delta'] = delta
    cygnss_df['total_wind'] = total_wind
    return cygnss_df
    
    
def relevant_files(year, month, directory, include_extra = False):
    files = os.listdir(directory)
    input_argument = str(year) + '_' + str(month)
    
    # Sort file names by name
    files = sorted(files) 

    relevant_files = []
    for index, file_name in enumerate(files):
        if file_name.startswith(input_argument):
            relevant_files.append(file_name)
         
    if include_extra:

        index_extra_start = files.index(relevant_files[0]) - 1
        index_extra_end = files.index(relevant_files[-1]) + 1
    
        relevant_files.append(files[index_extra_start])
        relevant_files.append(files[index_extra_end])
    
    relevant_files = [directory + '/' + s for s in relevant_files]

    return relevant_files

In [None]:
years = ['2017', '2018', '2019','2020', '2021', '2022']

for i in tqdm(range(len(years))):
    if years[i] == '2017':
        months = ['03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]
    elif years[i] == '2022':
        months = ['01']
    else:
        months = ['01','02','03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]
        
    for month in months:
        # Somehow need to include extra wind and current to cover
        era_5_files = relevant_files(years[i], month, 'era_5')
        cygnss_files = relevant_files(years[i], month, 'level_2_mss')
        oskar_files = relevant_files(years[i], month, 'oskar_data', True)        
        cygnss_df = get_interp_cygnnss(pd.concat(map(pd.read_csv, cygnss_files )), 
                                       pd.concat(map(pd.read_csv, era_5_files )), 
                                       pd.concat(map(pd.read_csv, oskar_files )))
        cygnss_df.to_csv("colocated_data/" + years[i] + month + ".csv" ,index=False)

In [8]:
oskar_files = relevant_files('2017', '03', 'oskar_data')        
pd.concat(map(pd.read_csv, oskar_files )), oskar_files

(               u         v   lat        lon    time
 0      -0.031251  0.046217  38.0  20.000000  213936
 1      -0.048741  0.002651  38.0  20.333333  213936
 2      -0.013912 -0.050614  38.0  20.666667  213936
 3       0.001020 -0.078810  38.0  21.000000  213936
 4       0.030352 -0.067792  38.0  24.333333  213936
 ...          ...       ...   ...        ...     ...
 179794 -0.835388 -0.603978 -38.0  19.666667  240096
 179795 -1.278264 -0.998600 -38.0  20.000000  240096
 179796 -1.368990 -1.070490 -38.0  20.333333  240096
 179797 -1.094822 -0.742922 -38.0  20.666667  240096
 179798 -0.678719 -0.518529 -38.0  21.000000  240096
 
 [1441423 rows x 5 columns],
 ['oskar_data/2017_03_02.csv',
  'oskar_data/2017_03_07.csv',
  'oskar_data/2017_03_12.csv',
  'oskar_data/2017_03_18.csv',
  'oskar_data/2017_03_23.csv',
  'oskar_data/2017_03_28.csv',
  'oskar_data/2020_03_12.csv',
  'oskar_data/2020_02_25.csv'])