In [1]:
import numpy as np
import pandas as pd
from scipy.interpolate import LinearNDInterpolator
import os
from tqdm import tqdm
import datetime



def interpolate_era_5(era_5_df, cygnss_row, component):
    
    if component == 'v10':
        interp_v10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                          era_5_df['v10'])
        return interp_v10(cygnss_row[2], cygnss_row[1], cygnss_row[3])

    
    else:

        interp_u10 = LinearNDInterpolator(list(zip(era_5_df['sp_lon'], era_5_df['sp_lat'], era_5_df['hours_since_ref'])),
                                          era_5_df['u10'])
        return interp_u10(cygnss_row[2], cygnss_row[1], cygnss_row[3])
    
    

def interpolate_oskar(oskar_df, cygnss_row, component):
    
    if component == 'v':
        interp_v = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['v'])

        return interp_v(cygnss_row[2], cygnss_row[1], cygnss_row[3])

    
    else:

        interp_u = LinearNDInterpolator(list(zip(oskar_df['lat'], oskar_df['lon'], oskar_df['time'])),oskar_df['u'])

        return interp_u(cygnss_row[2], cygnss_row[1], cygnss_row[3])


    

def get_interp_cygnnss(cygnss_row, era_5_df, oskar_df):

    v10 = interpolate_era_5(era_5_df, cygnss_row, 'v10')
    u10 = interpolate_era_5(era_5_df, cygnss_row, 'u10')
    v_current = interpolate_oskar(oskar_df, cygnss_row, 'v')
    u_current = interpolate_oskar(oskar_df, cygnss_row, 'u')
        
    diff_u = u10 - u_current
    diff_v = v10 - v_current
    
    total_wind = np.sqrt(u10 ** 2 + v10 ** 2)
    delta = np.sqrt(diff_u ** 2 + diff_v ** 2)

    return total_wind, delta


def get_relevant_filenames(cygnss_filename, data_set):
    relevant_filenames = [cygnss_filename ]
    date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
    if data_set == "era_5":
        next_date = date + datetime.timedelta(days=1)
        next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2) +".csv"
        relevant_filenames.append(next_day_string)
        return relevant_filenames
    else:
        for i in range(-7, 0):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        date = datetime.datetime(int(cygnss_filename[0:4]), int(cygnss_filename[5:7]), int(cygnss_filename[8:10]))
        for i in range(1, 8):
            next_date = date + datetime.timedelta(days=i)
            next_day_string = str(next_date.year) + "_" + str(next_date.month).zfill(2) + '_' + str(next_date.day).zfill(2)+".csv"
            relevant_filenames.append(next_day_string)
        return relevant_filenames


def relevant_files(input_arguments, directory):
    files = os.listdir(directory)    
    # Sort file names by name
    files = sorted(files) 

    relevant_files = []
    for index, file_name in enumerate(files):
        for argument in input_arguments:
            if file_name.startswith(argument):
                relevant_files.append(file_name)   
    relevant_files = [directory + '/' + s for s in relevant_files]

    return relevant_files

In [2]:
files = os.listdir("level_2_mss")
for file in tqdm(files):
    total_winds = []
    deltas = []
    potensial_era_5_files = get_relevant_filenames(file, 'era_5')
    potensial_oskar_files = get_relevant_filenames(file, 'oskar')
        
    era_5_files = relevant_files(potensial_era_5_files, 'era_5')
    oskar_files = relevant_files(potensial_oskar_files, 'oskar_data')

    print("Processing these files : ")
    print(file)
    print(era_5_files)
    print(oskar_files)
    
    cygnss_df = pd.read_csv("level_2_mss/" + file)
    era_5_df =  pd.concat(map(pd.read_csv, era_5_files ))
    oskar_df =  pd.concat(map(pd.read_csv, oskar_files ))
    if len(era_5_files) > 1 and len(oskar_files) > 1:
        
        for index, row in tqdm(cygnss_df.iterrows()):
            possible_lats_oskar = [row[1] - 0.35, row[1] + 0.35] 
            possible_lons_oskar = [row[2] - 0.35, row[2] + 0.35] 

            possible_lats_era5 = [row[1] - 0.25, row[1] + 0.25] 
            possible_lons_era5 = [row[2] - 0.25, row[2] + 0.25] 
            possible_times_era5 =[row[3] - 2, row[3] + 2]

            sub_era_5_df = era_5_df.loc[(era_5_df['hours_since_ref']>possible_times_era5[0]) & (era_5_df['hours_since_ref']< possible_times_era5[1])
                                        & (era_5_df['sp_lon']>possible_lons_era5[0]) & (era_5_df['sp_lon']< possible_lons_era5[1]) &
                                        (era_5_df['sp_lat']>possible_lats_era5[0]) & (era_5_df['sp_lat']< possible_lats_era5[1])]

            
            
            sub_oskar_df = oskar_df.loc[(oskar_df['lon']>possible_lons_oskar[0]) & (oskar_df['lon']< possible_lons_oskar[1]) &
                                                 (oskar_df['lat']>possible_lats_oskar[0]) & (oskar_df['lat']< possible_lats_oskar[1])]
                        
            total_wind, delta = get_interp_cygnnss(row,sub_era_5_df, sub_oskar_df)
        cygnss_df['total_wind'] = total_winds
        cygnss_df['delta'] = deltas
        cygnss_df.to_csv("colocated_data/" + file[0:-4] + ".csv" ,index=False)

  0%|          | 0/143 [00:00<?, ?it/s]

Processing these files : 
2017_03_18.csv
['era_5/2017_03_18.csv', 'era_5/2017_03_19.csv']
['oskar_data/2017_03_18.csv', 'oskar_data/2017_03_23.csv']



0it [00:00, ?it/s][A
1it [00:00,  3.97it/s][A

Interp v10
901896    210.75
901897    210.75
901920    210.75
901921    210.75
909216    211.00
909217    211.00
909240    211.00
909241    211.00
Name: sp_lon, dtype: float64 901896    22.00
901897    22.00
901920    21.75
901921    21.75
909216    22.00
909217    22.00
909240    21.75
909241    21.75
Name: sp_lat, dtype: float64 901896    214320.0
901897    214321.0
901920    214320.0
901921    214321.0
909216    214320.0
909217    214321.0
909240    214320.0
909241    214321.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



2it [00:00,  3.99it/s][A

Interp v10
799608    207.25
799609    207.25
799632    207.25
799633    207.25
806928    207.50
806929    207.50
806952    207.50
806953    207.50
Name: sp_lon, dtype: float64 799608    20.00
799609    20.00
799632    19.75
799633    19.75
806928    20.00
806929    20.00
806952    19.75
806953    19.75
Name: sp_lat, dtype: float64 799608    214320.0
799609    214321.0
799632    214320.0
799633    214321.0
806928    214320.0
806929    214321.0
806952    214320.0
806953    214321.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



3it [00:00,  3.95it/s][A

Interp v10
719640    204.50
719641    204.50
719664    204.50
719665    204.50
726960    204.75
726961    204.75
726984    204.75
726985    204.75
Name: sp_lon, dtype: float64 719640    14.25
719641    14.25
719664    14.00
719665    14.00
726960    14.25
726961    14.25
726984    14.00
726985    14.00
Name: sp_lat, dtype: float64 719640    214320.0
719641    214321.0
719664    214320.0
719665    214321.0
726960    214320.0
726961    214321.0
726984    214320.0
726985    214321.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



4it [00:01,  3.72it/s][A

Interp v10
7162704    64.50
7162705    64.50
7162728    64.50
7162729    64.50
7170024    64.75
7170025    64.75
7170048    64.75
7170049    64.75
Name: sp_lon, dtype: float64 7162704   -1.00
7162705   -1.00
7162728   -1.25
7162729   -1.25
7170024   -1.00
7170025   -1.00
7170048   -1.25
7170049   -1.25
Name: sp_lat, dtype: float64 7162704    214320.0
7162705    214321.0
7162728    214320.0
7162729    214321.0
7170024    214320.0
7170025    214321.0
7170048    214320.0
7170049    214321.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



5it [00:01,  3.82it/s][A

Interp v10
6511944    42.25
6511945    42.25
6511968    42.25
6511969    42.25
6519264    42.50
6519265    42.50
6519288    42.50
6519289    42.50
Name: sp_lon, dtype: float64 6511944   -8.50
6511945   -8.50
6511968   -8.75
6511969   -8.75
6519264   -8.50
6519265   -8.50
6519288   -8.75
6519289   -8.75
Name: sp_lat, dtype: float64 6511944    214320.0
6511945    214321.0
6511968    214320.0
6511969    214321.0
6519264    214320.0
6519265    214321.0
6519288    214320.0
6519289    214321.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



6it [00:01,  3.85it/s][A

Interp v10
6644184    46.75
6644185    46.75
6644208    46.75
6644209    46.75
6651504    47.00
6651505    47.00
6651528    47.00
6651529    47.00
Name: sp_lon, dtype: float64 6644184   -13.50
6644185   -13.50
6644208   -13.75
6644209   -13.75
6651504   -13.50
6651505   -13.50
6651528   -13.75
6651529   -13.75
Name: sp_lat, dtype: float64 6644184    214320.0
6644185    214321.0
6644208    214320.0
6644209    214321.0
6651504    214320.0
6651505    214321.0
6651528    214320.0
6651529    214321.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



7it [00:01,  3.99it/s][A

Interp v10
6527016    42.75
6527017    42.75
6527040    42.75
6527041    42.75
6534336    43.00
6534337    43.00
6534360    43.00
6534361    43.00
Name: sp_lon, dtype: float64 6527016   -13.00
6527017   -13.00
6527040   -13.25
6527041   -13.25
6534336   -13.00
6534337   -13.00
6534360   -13.25
6534361   -13.25
Name: sp_lat, dtype: float64 6527016    214320.0
6527017    214321.0
6527040    214320.0
6527041    214321.0
6534336    214320.0
6534337    214321.0
6534360    214320.0
6534361    214321.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



8it [00:02,  4.04it/s][A

Interp v10
6511944    42.25
6511945    42.25
6511946    42.25
6511968    42.25
6511969    42.25
6511970    42.25
6519264    42.50
6519265    42.50
6519266    42.50
6519288    42.50
6519289    42.50
6519290    42.50
Name: sp_lon, dtype: float64 6511944   -8.50
6511945   -8.50
6511946   -8.50
6511968   -8.75
6511969   -8.75
6511970   -8.75
6519264   -8.50
6519265   -8.50
6519266   -8.50
6519288   -8.75
6519289   -8.75
6519290   -8.75
Name: sp_lat, dtype: float64 6511944    214320.0
6511945    214321.0
6511946    214322.0
6511968    214320.0
6511969    214321.0
6511970    214322.0
6519264    214320.0
6519265    214321.0
6519266    214322.0
6519288    214320.0
6519289    214321.0
6519290    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



9it [00:02,  4.11it/s][A

Interp v10
7170024    64.75
7170025    64.75
7170026    64.75
7170048    64.75
7170049    64.75
7170050    64.75
7177344    65.00
7177345    65.00
7177346    65.00
7177368    65.00
7177369    65.00
7177370    65.00
Name: sp_lon, dtype: float64 7170024   -1.00
7170025   -1.00
7170026   -1.00
7170048   -1.25
7170049   -1.25
7170050   -1.25
7177344   -1.00
7177345   -1.00
7177346   -1.00
7177368   -1.25
7177369   -1.25
7177370   -1.25
Name: sp_lat, dtype: float64 7170024    214320.0
7170025    214321.0
7170026    214322.0
7170048    214320.0
7170049    214321.0
7170050    214322.0
7177344    214320.0
7177345    214321.0
7177346    214322.0
7177368    214320.0
7177369    214321.0
7177370    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



10it [00:02,  4.19it/s][A

Interp v10
6644184    46.75
6644185    46.75
6644186    46.75
6644208    46.75
6644209    46.75
6644210    46.75
6651504    47.00
6651505    47.00
6651506    47.00
6651528    47.00
6651529    47.00
6651530    47.00
Name: sp_lon, dtype: float64 6644184   -13.50
6644185   -13.50
6644186   -13.50
6644208   -13.75
6644209   -13.75
6644210   -13.75
6651504   -13.50
6651505   -13.50
6651506   -13.50
6651528   -13.75
6651529   -13.75
6651530   -13.75
Name: sp_lat, dtype: float64 6644184    214320.0
6644185    214321.0
6644186    214322.0
6644208    214320.0
6644209    214321.0
6644210    214322.0
6651504    214320.0
6651505    214321.0
6651506    214322.0
6651528    214320.0
6651529    214321.0
6651530    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



11it [00:02,  4.31it/s][A

Interp v10
6534336    43.00
6534337    43.00
6534338    43.00
6534360    43.00
6534361    43.00
6534362    43.00
6541656    43.25
6541657    43.25
6541658    43.25
6541680    43.25
6541681    43.25
6541682    43.25
Name: sp_lon, dtype: float64 6534336   -13.00
6534337   -13.00
6534338   -13.00
6534360   -13.25
6534361   -13.25
6534362   -13.25
6541656   -13.00
6541657   -13.00
6541658   -13.00
6541680   -13.25
6541681   -13.25
6541682   -13.25
Name: sp_lat, dtype: float64 6534336    214320.0
6534337    214321.0
6534338    214322.0
6534360    214320.0
6534361    214321.0
6534362    214322.0
6541656    214320.0
6541657    214321.0
6541658    214322.0
6541680    214320.0
6541681    214321.0
6541682    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



12it [00:02,  4.33it/s][A

Interp v10
901896    210.75
901897    210.75
901898    210.75
901920    210.75
901921    210.75
901922    210.75
909216    211.00
909217    211.00
909218    211.00
909240    211.00
909241    211.00
909242    211.00
Name: sp_lon, dtype: float64 901896    22.00
901897    22.00
901898    22.00
901920    21.75
901921    21.75
901922    21.75
909216    22.00
909217    22.00
909218    22.00
909240    21.75
909241    21.75
909242    21.75
Name: sp_lat, dtype: float64 901896    214320.0
901897    214321.0
901898    214322.0
901920    214320.0
901921    214321.0
901922    214322.0
909216    214320.0
909217    214321.0
909218    214322.0
909240    214320.0
909241    214321.0
909242    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



13it [00:03,  4.14it/s][A

Interp v10
726960    204.75
726961    204.75
726962    204.75
726984    204.75
726985    204.75
726986    204.75
734280    205.00
734281    205.00
734282    205.00
734304    205.00
734305    205.00
734306    205.00
Name: sp_lon, dtype: float64 726960    14.25
726961    14.25
726962    14.25
726984    14.00
726985    14.00
726986    14.00
734280    14.25
734281    14.25
734282    14.25
734304    14.00
734305    14.00
734306    14.00
Name: sp_lat, dtype: float64 726960    214320.0
726961    214321.0
726962    214322.0
726984    214320.0
726985    214321.0
726986    214322.0
734280    214320.0
734281    214321.0
734282    214322.0
734304    214320.0
734305    214321.0
734306    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



14it [00:03,  4.04it/s][A

Interp v10
799608    207.25
799609    207.25
799610    207.25
799632    207.25
799633    207.25
799634    207.25
806928    207.50
806929    207.50
806930    207.50
806952    207.50
806953    207.50
806954    207.50
Name: sp_lon, dtype: float64 799608    20.00
799609    20.00
799610    20.00
799632    19.75
799633    19.75
799634    19.75
806928    20.00
806929    20.00
806930    20.00
806952    19.75
806953    19.75
806954    19.75
Name: sp_lat, dtype: float64 799608    214320.0
799609    214321.0
799610    214322.0
799632    214320.0
799633    214321.0
799634    214322.0
806928    214320.0
806929    214321.0
806930    214322.0
806952    214320.0
806953    214321.0
806954    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



15it [00:03,  4.20it/s][A

Interp v10
6511944    42.25
6511945    42.25
6511946    42.25
6511968    42.25
6511969    42.25
6511970    42.25
6519264    42.50
6519265    42.50
6519266    42.50
6519288    42.50
6519289    42.50
6519290    42.50
Name: sp_lon, dtype: float64 6511944   -8.50
6511945   -8.50
6511946   -8.50
6511968   -8.75
6511969   -8.75
6511970   -8.75
6519264   -8.50
6519265   -8.50
6519266   -8.50
6519288   -8.75
6519289   -8.75
6519290   -8.75
Name: sp_lat, dtype: float64 6511944    214320.0
6511945    214321.0
6511946    214322.0
6511968    214320.0
6511969    214321.0
6511970    214322.0
6519264    214320.0
6519265    214321.0
6519266    214322.0
6519288    214320.0
6519289    214321.0
6519290    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u
Interp v10
6534336    43.00
6534337    43.00
6534338    43.00
6534360    43.00
6534361    43.00
6534362    43.00
6541656    43.25
6541657    43.25
6541658    43.25
6541680    43.25
6541681    43.25
6541682    43.25
Name: sp_lon,


16it [00:03,  4.26it/s][A

 6534336   -13.00
6534337   -13.00
6534338   -13.00
6534360   -13.25
6534361   -13.25
6534362   -13.25
6541656   -13.00
6541657   -13.00
6541658   -13.00
6541680   -13.25
6541681   -13.25
6541682   -13.25
Name: sp_lat, dtype: float64 6534336    214320.0
6534337    214321.0
6534338    214322.0
6534360    214320.0
6534361    214321.0
6534362    214322.0
6541656    214320.0
6541657    214321.0
6541658    214322.0
6541680    214320.0
6541681    214321.0
6541682    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



17it [00:04,  4.32it/s][A

Interp v10
6644184    46.75
6644185    46.75
6644186    46.75
6644208    46.75
6644209    46.75
6644210    46.75
6651504    47.00
6651505    47.00
6651506    47.00
6651528    47.00
6651529    47.00
6651530    47.00
Name: sp_lon, dtype: float64 6644184   -13.50
6644185   -13.50
6644186   -13.50
6644208   -13.75
6644209   -13.75
6644210   -13.75
6651504   -13.50
6651505   -13.50
6651506   -13.50
6651528   -13.75
6651529   -13.75
6651530   -13.75
Name: sp_lat, dtype: float64 6644184    214320.0
6644185    214321.0
6644186    214322.0
6644208    214320.0
6644209    214321.0
6644210    214322.0
6651504    214320.0
6651505    214321.0
6651506    214322.0
6651528    214320.0
6651529    214321.0
6651530    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u



18it [00:04,  4.35it/s][A

Interp v10
901896    210.75
901897    210.75
901898    210.75
901920    210.75
901921    210.75
901922    210.75
909216    211.00
909217    211.00
909218    211.00
909240    211.00
909241    211.00
909242    211.00
Name: sp_lon, dtype: float64 901896    22.00
901897    22.00
901898    22.00
901920    21.75
901921    21.75
901922    21.75
909216    22.00
909217    22.00
909218    22.00
909240    21.75
909241    21.75
909242    21.75
Name: sp_lat, dtype: float64 901896    214320.0
901897    214321.0
901898    214322.0
901920    214320.0
901921    214321.0
901922    214322.0
909216    214320.0
909217    214321.0
909218    214322.0
909240    214320.0
909241    214321.0
909242    214322.0
Name: hours_since_ref, dtype: float64
Interp u10
Interp v
Interp u


18it [00:04,  3.94it/s]
  0%|          | 0/143 [00:15<?, ?it/s]


KeyboardInterrupt: 