This notebook collocate bouy measurements with both the era5 model (wave height and wind speed) and with the copernicus scatterometer model (windspeed only)

The bouy data is found in the INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030 dataset

The era5 model data is found at [cds.climate.copernicus.eu](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels?tab=overview). The wind speed parameters are u10 and v10 while for wave height we have used the swh parameter.
The data can be downloaded using the API_data_download.ipynb notebook
Since the there is a limit on file sizes the wind data for 1 year has to be downloaded in multiple files (2 months each) and then concatenated manually using ncrcat after first adding a record value to the original files using the ncks --mk_rec_dmn command.
Keep the wind speed and wave height models separeate since the resolution is different.

The copernicus wind speed model is found at [data.marine.copernicus](https://data.marine.copernicus.eu/product/WIND_GLO_PHY_L4_MY_012_006/description)
and can be downloaded and concatenated automatically using the download_data_copernicus.ipynb notebook.

The result of this comparison is a draframe saved in a pickle file on disk that can be used to calculate and plot stats from the comparison


### Imports and function definitions

In [85]:
import numpy as np
import pandas as pd
import xarray as xr
import os
import math
import pickle
import matplotlib as mpl
import matplotlib.pyplot as plt
from tqdm import tqdm
from functools import reduce
import operator
from shapely import Point, LineString, Polygon, MultiPolygon
import cartopy
import time
import itertools
#Enables the line profiler magic command #lprun
%load_ext line_profiler

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [86]:
#Filters the dataframe for datapoint within the land_multipolygon
def shore_line_filter(df, land_multipolygon, long_col_name='LONG', lat_col_name='LAT', force_brutefoce=False):
    #Return immediately if the dataframe is empty
    if df.shape[0] == 0:
        return df
    
    #distance considered close to shore (in degrees)
    close_dist = 0.00001

    if not force_brutefoce:
        unique_long = np.unique(df[long_col_name])
        n_unique_long = unique_long.shape[0]
        min_unique_long = unique_long.min()
        max_unique_long = unique_long.max()

        unique_lat = np.unique(df[lat_col_name])
        n_unique_lat = unique_lat.shape[0]
        min_unique_lat = unique_lat.min()
        max_unique_lat = unique_lat.max()
        
        #Try for the simple shape (point, line or rectangele) of the limit(s) of the bouy movement
        coord_points = list(itertools.product(set([min_unique_long, max_unique_long]), set([min_unique_lat, max_unique_lat])))
        if len(coord_points) == 1:
            limit_geo_obj = Point(coord_points)
        elif len(coord_points) == 2:
            limit_geo_obj = LineString(coord_points)
        elif len(coord_points) == 4:
            #Untangle the polygon itersection
            coord_points[2:4]=reversed(coord_points[2:4])
            #Form closed polygon by adding adding fist point as last
            coord_points.append(coord_points[0])
            limit_geo_obj = Polygon(coord_points)
        
        #If the limit_geo_obj does not overlap we we do not have to filter any datapoints
        if not limit_geo_obj.distance(land_multipolygon) <= close_dist:
            #print('No overlap with simple shape')
            return df
        #if land_multipolygon contains the limit_geo_limit then all points should be removed
        elif land_multipolygon.contains(limit_geo_obj):
            #print('Complete overlap with simple shape')
            return df[0:0] #returning empty dataframe

        #Partial overlap
        else:
            unique_pos = df[[long_col_name, lat_col_name]].rename(columns={'long_col_name':'LONG', 'lat_col_name':'LAT'}).drop_duplicates()

            #if there are many repeated coordinates (more than 50%) we only inspect the distasnce for the unique coordinates
            #and use a lookup table to filter the whole dataframe
            if unique_pos.shape[0] < df.shape[0]*0.5:
                #print("Dist to unique combinatons only")
                unique_pos['keep'] = unique_pos.apply(lambda row: Point([row['LONG'], row['LAT']]).distance(land_multipolygon) > close_dist, axis=1)
                unique_pos = unique_pos.set_index(['LONG', 'LAT'])

                #Create dataframe filter by looking up the value of unique_pos for each row
                dist_filter = df[[long_col_name, lat_col_name]].apply(
                    lambda row: unique_pos.loc[tuple(row)]['keep'],
                    axis=1)
                
                return df[dist_filter]

    #Sove by bruteforce
    #print("Solving with bruteforce")
    brute_force_dist_filter = df.apply(
        lambda row: Point([row[long_col_name], row[lat_col_name]]).distance(land_multipolygon) > close_dist,
        axis=1)

    return df[brute_force_dist_filter]

In [87]:
#Extracts data from the dataset ds within the time_filter (tuple or timespan) interval for the 
#variable var_name found in the deph range deph_range in meters, positive is under water, negative above water
#It can be either a tuple (min,max) or a value it needs to equal
#Quality controll is made for position, deph, time, and the variable
#Note depth is the coordinate index while deph (without t) is the actual depth in meters 
#long_limits, lat_limits are limit tuples form the model. Thjey are used to filter geographically
#land_multipolygon is to filter land and close to shore data
def valid_data_extraction(ds, var_name, deph_range, time_filter, long_limits, lat_limits, land_multipolygon):
    if var_name not in ds.data_vars:
        raise ValueError(var_name, ' Not found')

    #Add longitude, latidude and position_qc as variables indexed by time,depth as all other variables
    TIME = ds['TIME'].values
    DEPTH = ds['DEPTH'].values
    n_DEPTHS = len(DEPTH)

    dataset_columns = {
        'LONG':ds['LONGITUDE'],
        'LAT':ds['LATITUDE'],
        'POS_QC':ds['POSITION_QC'],
    }

    ds_pos = xr.Dataset(
        data_vars=
        {k:(
            ["TIME", 'DEPTH'],
            np.repeat(np.reshape(v.values, (-1,1)), n_DEPTHS, axis=1),
            v.attrs,
        )for (k,v) in dataset_columns.items()},
        coords=dict(
            TIME=TIME,
            DEPTH=DEPTH,
        )
    ).drop_vars('DEPTH')
    ds = xr.merge([ds.drop_dims(['LATITUDE', 'LONGITUDE', 'POSITION']), ds_pos])
    
    #Filter for time of interest
    if type(time_filter) is tuple:
        ds = ds.sel(TIME=slice(time_filter[0], time_filter[1]))
    else:
        ds = ds.sel(TIME=time_filter)
    
    #Filter only avalible columns
    colum_names = [var_name]
    colum_names_qc = [var_name + '_QC']
    
    #Add fixed columns
    colum_names.extend(['LONG', 'LAT', 'DEPH'])
    colum_names_qc.extend(['DEPH_QC'])
    time_pos_qc = ['TIME_QC', 'POS_QC']
    
    #Filter for columns of interest
    ds = ds[colum_names + colum_names_qc + time_pos_qc]

    df = ds.to_dataframe()

    #Remove all rows with 0 bouy value
    df = df[df[var_name] != 0]

    #Filter for data only within model limits
    geo_filter = (long_limits[0] <= df['LONG']) & (df['LONG'] <= long_limits[1]) & (lat_limits[0] <= df['LAT']) & (df['LAT'] <= lat_limits[1])
    df = df[geo_filter]
    
    QC_good = [1.0, 7.0]
    #QC control for time and pos uses all of these values according to https://doi.org/10.13155/59938
    QC_time_pos_good = [1, 2, 5, 7, 8]
    
    #Filter the variable and depth for good quality data 
    filter_qc = [df[c_qc].isin(QC_good) for c_qc in colum_names_qc]
    #Filter for good time and pos 
    filter_qc.extend([df[c_qc].isin(QC_time_pos_good) for c_qc in time_pos_qc])
    #Add filter for deph value
    if type(deph_range) == tuple:
        filter_qc.append((deph_range[0] <= df['DEPH']) & (df['DEPH'] <= deph_range[1]))
    else:
        filter_qc.append(df['DEPH'] == deph_range)
    #Element-wise AND the filter
    filter_qc = reduce(operator.and_, filter_qc)
    df = df[filter_qc][colum_names]

    #Filter data of df to only include data that does not lie close to shore
    df = shore_line_filter(df, land_multipolygon)

    return df

In [88]:
def search_file_sel(file, data_dir, var_list, var_depth, model_result_columns, model_coords_columns, model_result_functions, time_filter, long_limits, lat_limits, model_ds, land_multipolygon, result_df):
    #Conditionally create the result dataframe
    if result_df is None:
        result_df = pd.DataFrame({c: pd.Series(dtype=t) for c, t in {
            'bouy_file_name':str,
            'bouy_longitude':float,
            'bouy_latitude':float,
            'bouy_time':np.dtype('<M8[ns]'), #np.datetime64
            'bouy_depth':float,
            'bouy_variable_name':str,
            'bouy_variable_value':float,
            'model_value':object,
            'model_longitude':float,
            'model_latitude':float,
            'model_time':np.dtype('<M8[ns]'),  #np.datetime64
        }.items()})

    #Load the data from the file
    file_path = os.path.join(data_dir, file)
    ds = xr.open_dataset(file_path)# , engine='scipy')
    ds_vars = list(ds.data_vars)

    #Filter for variables that exist in the data
    common_variables = set(var_list).intersection(ds_vars)

    for var_name in common_variables:
        try:
            df = valid_data_extraction(ds, var_name, var_depth[var_name], time_filter[var_name], long_limits[var_name], lat_limits[var_name], land_multipolygon)
        except Exception as e:
            print(file, ' Could not extract data. Error: ' , e)
            continue
        
        if df.empty:
            continue
        else:
            #print(file, " : data found, shape: ", df.shape)
            pass

        #Drop unused columns and rename the other
        df = df.reset_index().drop(labels=['DEPTH'], axis=1).rename(columns={
            'DEPH':'bouy_depth',
            'TIME': 'bouy_time',
            'LONG': 'bouy_longitude',
            'LAT':'bouy_latitude',
            var_name:'bouy_variable_value'})
        #Add bouy_file_name column
        df['bouy_file_name'] = file
        #Add bouy_variable_name column
        df['bouy_variable_name'] = var_name

        #Shorter variable names for the model dimensions
        model_long = model_coords_columns[var_name]['longitude']
        model_lat = model_coords_columns[var_name]['latitude']
        model_time = model_coords_columns[var_name]['time']

        #Select the geographic region of interest, let time be the coordinate
        #Convert to dataframe reset indexing and rename the columns to signal model columns
        model_result_df = model_ds[var_name].sel({
            model_long:xr.DataArray(df['bouy_longitude'], dims=model_time),
            model_lat:xr.DataArray(df['bouy_latitude'], dims=model_time),
            model_time:xr.DataArray(df['bouy_time'], dims=model_time)},
            method='nearest')[model_result_columns[var_name]].to_dataframe().reset_index().rename(columns={
                model_time: 'model_time',
                model_long: 'model_longitude',
                model_lat: 'model_latitude'})
        
        #Creating the model_value column
        model_result_df['model_value'] = model_result_df.apply(model_result_functions[var_name], axis=1)
            
        #Concat model result with the bouy results
        df_concat = pd.concat([df, model_result_df], axis=1)

        #Filter nan values in the model columns
        df_concat = df_concat[df_concat[model_result_columns[var_name]].isna().apply(lambda x: not any(x), axis=1)]

        #Dropping the result columns
        df_concat = df_concat.drop(labels=model_result_columns[var_name], axis=1)

        result_df = pd.concat([result_df, df_concat])

    return result_df

In [89]:
#Searches the Appends
def search_file_interp(file, data_dir, var_list, var_depth, model_result_columns, model_coords_columns, model_result_functions, time_filter, long_limits, lat_limits, model_ds, land_multipolygon, result_df):
    #Conditionally create the result dataframe
    if result_df is None:
        result_df = pd.DataFrame({c: pd.Series(dtype=t) for c, t in {
            'bouy_file_name':str,
            'bouy_longitude':float,
            'bouy_latitude':float,
            'bouy_time':np.dtype('<M8[ns]'), #np.datetime64
            'bouy_depth':float,
            'bouy_variable_name':str,
            'bouy_variable_value':float,
            'model_value':object,
            'model_longitude':float,
            'model_latitude':float,
            'model_time':np.dtype('<M8[ns]'),  #np.datetime64
        }.items()})

    #Load the data from the file
    file_path = os.path.join(data_dir, file)
    ds = xr.open_dataset(file_path)# , engine='scipy')
    ds_vars = list(ds.data_vars)

    #Filter for variables that exist in the data
    common_variables = set(var_list).intersection(ds_vars)

    for var_name in common_variables:
        try:
            df = valid_data_extraction(ds, var_name, var_depth[var_name], time_filter[var_name], long_limits[var_name], lat_limits[var_name], land_multipolygon)
        except Exception as e:
            print(file, ' Could not extract data. Error: ' , e)
            continue
        
        if df.empty:
            continue
        else:
            #print(file, " : data found, shape: ", df.shape)
            pass

        #Drop unused columns and rename the other
        df = df.reset_index().drop(labels=['DEPTH'], axis=1).rename(columns={
            'DEPH':'bouy_depth',
            'TIME': 'bouy_time',
            'LONG': 'bouy_longitude',
            'LAT':'bouy_latitude',
            var_name:'bouy_variable_value'})
        #Add bouy_file_name column
        df['bouy_file_name'] = file
        #Add bouy_variable_name column
        df['bouy_variable_name'] = var_name

        #Shorter variable names for the model dimensions
        model_long = model_coords_columns[var_name]['longitude']
        model_lat = model_coords_columns[var_name]['latitude']
        model_time = model_coords_columns[var_name]['time']

        #Select the geographic region of interest, let time be the coordinate
        #Convert to dataframe reset indexing and rename the columns to signal model columns
        model_result_df = model_ds[var_name].interp({
            model_long:xr.DataArray(df['bouy_longitude'], dims='unused_dim'),
            model_lat:xr.DataArray(df['bouy_latitude'], dims='unused_dim'),
            model_time:xr.DataArray(df['bouy_time'], dims='unused_dim')},
            method='linear')[model_result_columns[var_name]].to_dataframe().rename(columns={
                model_time: 'model_time',
                model_long: 'model_longitude',
                model_lat:'model_latitude'})
        
        #Creating the model_value column
        model_result_df['model_value'] = model_result_df.apply(model_result_functions[var_name], axis=1)
            
        #Concat model result with the bouy results
        df_concat = pd.concat([df, model_result_df], axis=1)

        #Filter nan values in the model columns
        df_concat = df_concat[df_concat[model_result_columns[var_name]].isna().apply(lambda x: not any(x), axis=1)]

        #Dropping the result columns
        df_concat = df_concat.drop(labels=model_result_columns[var_name], axis=1)

        result_df = pd.concat([result_df, df_concat])

    return result_df

### Loading the models needed

In [90]:
#Loading significant wave height model for era 5 model
mod_swh_era5_data_file = '/data/exjobb/sarssw/model/2021_swh_era5_world_wide.nc'

mod_swh_era5_ds = xr.open_dataset(mod_swh_era5_data_file)
mod_swh_era5_long_coord = mod_swh_era5_ds.coords['longitude'].values
mod_swh_era5_long_limits = (mod_swh_era5_long_coord.min(), mod_swh_era5_long_coord.max())

mod_swh_lat_coord = mod_swh_era5_ds.coords['latitude'].values
mod_swh_era5_lat_limits = (mod_swh_lat_coord.min(), mod_swh_lat_coord.max())

mod_swh_time_coord = mod_swh_era5_ds.coords['time'].values
mod_swh_era5_time_limits = (mod_swh_time_coord.min(), mod_swh_time_coord.max())

display(mod_swh_era5_ds)

In [91]:
#Loading era5 wind speed model
mod_wspd_era5_filepath = '/data/exjobb/sarssw/model/2021_wspd_era5_world_wide/all.nc'
mod_wspd_era5_ds = xr.open_dataset(mod_wspd_era5_filepath)

mod_wspd_era5_long_coord = mod_wspd_era5_ds.coords['longitude'].values
mod_wspd_era5_long_limits = (mod_wspd_era5_long_coord.min(), mod_wspd_era5_long_coord.max())

mod_wspd_era5_lat_coord = mod_wspd_era5_ds.coords['latitude'].values
mod_wspd_era5_lat_limits = (mod_wspd_era5_lat_coord.min(), mod_wspd_era5_lat_coord.max())

mod_wspd_era5_time_coord = mod_wspd_era5_ds.coords['time'].values
mod_wspd_era5_time_limits = (mod_wspd_era5_time_coord.min(), mod_wspd_era5_time_coord.max())

display(mod_wspd_era5_ds)

In [92]:
#Loading copernicus Scatterometer wind speed model
mod_wspd_coper_filepath = '/data/exjobb/sarssw/model/WIND_GLO_PHY/all.nc'
mod_wspd_coper_ds = xr.open_dataset(mod_wspd_coper_filepath)

mod_wspd_coper_long_coord = mod_wspd_coper_ds.coords['lon'].values
mod_wspd_coper_long_limits = (mod_wspd_coper_long_coord.min(), mod_wspd_coper_long_coord.max())

mod_wspd_coper_lat_coord = mod_wspd_coper_ds.coords['lat'].values
mod_wspd_coper_lat_limits = (mod_wspd_coper_lat_coord.min(), mod_wspd_coper_lat_coord.max())

mod_wspd_coper_time_coord = mod_wspd_coper_ds.coords['time'].values
mod_wspd_coper_time_limits = (mod_wspd_coper_time_coord.min(), mod_wspd_coper_time_coord.max())

display(mod_wspd_coper_ds)

In [93]:
#Load and create land multipolygon, buffered (expanded) to limit distance to shore
land_list = list(cartopy.feature.NaturalEarthFeature('physical', 'land', '50m').geometries())
polygon_list = []
for p  in land_list:
    if type(p) == MultiPolygon:
        polygon_list.extend(p.geoms)
    else:
        polygon_list.append(p)
land_multipolygon = MultiPolygon([p for p in polygon_list]).buffer(0.01)

if not land_multipolygon.is_valid:
    raise ValueError('Invalid MultiPolygon')

### The program

In [94]:
#Progam the era5 swh & wspd handling
write_folder_era5 = './model_bouy_comparison_era5'

var_list_era5 = [
    'VHM0',
    'VAVH',
    'WSPD',
]
    
var_depth_era5 = {
    'VHM0':0,
    'VAVH':0,
    'WSPD':(-30,0)
    }

model_result_columns_era5 = {
    'VHM0': ['swh'],
    'VAVH': ['swh'],
    'WSPD': ['u10', 'v10'],
    }

model_coords_columns_era5 = {
    'VHM0': {'time':'time', 'longitude':'longitude', 'latitude':'latitude'},
    'VAVH': {'time':'time', 'longitude':'longitude', 'latitude':'latitude'},
    'WSPD': {'time':'time', 'longitude':'longitude', 'latitude':'latitude'},
    }

model_result_functions_era5 = {
    'VHM0': (lambda row: float(row['swh'])),
    'VAVH': (lambda row: float(row['swh'])),
    'WSPD': (lambda row: math.sqrt(row['u10']**2 + row['v10']**2)),
    }

model_time_limits_era5 = {
    'VHM0':mod_swh_era5_time_limits,
    'VAVH':mod_swh_era5_time_limits,
    'WSPD':mod_wspd_era5_time_limits,
}

model_long_limits_era5 = {
    'VHM0':mod_swh_era5_long_limits,
    'VAVH':mod_swh_era5_long_limits,
    'WSPD':mod_wspd_era5_long_limits,
}

model_lat_limits_era5 = {
    'VHM0':mod_swh_era5_lat_limits,
    'VAVH':mod_swh_era5_lat_limits,
    'WSPD':mod_wspd_era5_lat_limits,
}

models_era5 = {
    'VHM0':mod_swh_era5_ds,
    'VAVH':mod_swh_era5_ds,
    'WSPD':mod_wspd_era5_ds,
}

In [95]:
#Progam the copernicus wspd model handling
write_folder_coper = './model_bouy_comparison_wspd_copernicus'

var_list_coper = [
    'WSPD',
]

var_depth_coper = {
    'WSPD':(-30,0)
    }

model_result_columns_coper = {
    'WSPD': ['northward_wind', 'eastward_wind'],
    }

model_coords_columns_coper = {
    'WSPD': {'time':'time', 'longitude':'lon', 'latitude':'lat'},
    }

model_result_functions_coper = {
    'WSPD': (lambda row: math.sqrt(row['northward_wind']**2 + row['eastward_wind']**2)),
    }

model_time_limits_coper = {
    'WSPD':mod_wspd_coper_time_limits,
}

model_long_limits_coper = {
    'WSPD':mod_wspd_coper_long_limits,
}

model_lat_limits_coper = {
    'WSPD':mod_wspd_coper_lat_limits,
}

models_coper = {
    'WSPD':mod_wspd_coper_ds,
}

In [96]:
#Main program
bouy_data_dir = '/data/exjobb/sarssw/bouy/INSITU_GLO_PHYBGCWAV_DISCRETE_MYNRT_013_030/MO'

bouy_file_filter = [
    'GL_TS_MO_41121.nc', #Flips longitude sign in the middle of the data, from 66 to -66???! resutlts in asf search with over 7000 matches.
]

bouy_files = list(set(os.listdir(bouy_data_dir)).difference(bouy_file_filter))
#bouy_files = ['NO_TS_MO_6300117.nc'] #USed to test both models since this file exist for both

#Outer loop for the different models
for model_name, write_folder, var_list, var_depth, model_result_columns, model_coords_columns, model_result_functions, model_time_limits, model_long_limits, model_lat_limits, models in [
    #For era5
    ("era5 swh & wspd", write_folder_era5, var_list_era5, var_depth_era5, model_result_columns_era5, model_coords_columns_era5, model_result_functions_era5, model_time_limits_era5, model_long_limits_era5, model_lat_limits_era5, models_era5),
    #For copernicus
    ("Copernicus wspd", write_folder_coper, var_list_coper, var_depth_coper, model_result_columns_coper, model_coords_columns_coper, model_result_functions_coper, model_time_limits_coper, model_long_limits_coper, model_lat_limits_coper, models_coper),
]:
    result_df_sel = None
    result_df_interp = None
    run_dict = {}
    print(f"Running the {model_name} model")

    for bouy_file in tqdm(bouy_files):
        start = time.time()
        #Co-locate using select
        result_df_sel = search_file_sel(bouy_file, bouy_data_dir, var_list, var_depth, model_result_columns, model_coords_columns, model_result_functions, model_time_limits, model_long_limits, model_lat_limits, models, land_multipolygon, result_df_sel)
        end = time.time()
        run_dict[bouy_file+' (sel)'] = end-start

        start = time.time()
        #Co-locate using interpret
        result_df_interp = search_file_interp(bouy_file, bouy_data_dir, var_list, var_depth, model_result_columns, model_coords_columns, model_result_functions, model_time_limits, model_long_limits, model_lat_limits, models, land_multipolygon, result_df_interp)
        end = time.time()
        run_dict[bouy_file+' (interp)'] = end-start

    display("result_df_sel:", result_df_sel)
    display("result_df_interp:", result_df_interp)
    display("Files with longes runtime:", sorted(run_dict.items(), key=lambda x: x[1], reverse=True)[:30])

    #Save result_df_sel and result_df_interp with pickle
    result_df_fn = 'result_dfs'

    #Conditionally creates the folder for the result
    os.makedirs(write_folder, exist_ok=True)

    with open(os.path.join(write_folder, result_df_fn),'wb') as f_w:
        pickle.dump((result_df_sel, result_df_interp),f_w)


Running the era5 swh & wspd model


  1%|          | 23/2487 [00:02<03:28, 11.82it/s]

BS_TS_MO_BurgasBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
BS_TS_MO_BurgasBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
BS_TS_MO_BurgasBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
BS_TS_MO_BurgasBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 11%|█         | 270/2487 [01:31<34:03,  1.09it/s]  

MO_TS_MO_VIDA.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
MO_TS_MO_VIDA.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 44%|████▍     | 1101/2487 [06:26<02:45,  8.37it/s] 

MO_TS_MO_Molo-Bandiera.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
MO_TS_MO_Molo-Bandiera.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 57%|█████▋    | 1406/2487 [07:32<03:02,  5.91it/s]

MO_TS_MO_NADR-S1.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
MO_TS_MO_NADR-S1.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 64%|██████▎   | 1584/2487 [08:22<02:39,  5.65it/s]

MO_TS_MO_ESTELLENCS.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
MO_TS_MO_ESTELLENCS.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 88%|████████▊ | 2197/2487 [11:44<00:29,  9.69it/s]

BS_TS_MO_VarnaBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
BS_TS_MO_VarnaBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
BS_TS_MO_VarnaBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
BS_TS_MO_VarnaBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


100%|██████████| 2487/2487 [13:22<00:00,  3.10it/s]


'result_df_sel:'

Unnamed: 0,bouy_file_name,bouy_longitude,bouy_latitude,bouy_time,bouy_depth,bouy_variable_name,bouy_variable_value,model_value,model_longitude,model_latitude,model_time
0,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 00:00:00.000000000,0.0,WSPD,5.999,5.471754,129.75,37.0,2021-01-01 00:00:00
1,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 01:00:00.000000000,0.0,WSPD,5.999,5.874218,129.75,37.0,2021-01-01 01:00:00
2,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 02:00:00.000000000,0.0,WSPD,3.999,6.626309,129.75,37.0,2021-01-01 02:00:00
3,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 03:00:00.000000000,0.0,WSPD,3.999,5.891838,129.75,37.0,2021-01-01 03:00:00
4,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 04:00:00.000000000,0.0,WSPD,2.999,5.554173,129.75,37.0,2021-01-01 04:00:00
...,...,...,...,...,...,...,...,...,...,...,...
2340,NO_TS_MO_NOO.nc,7.635000,54.46833,2021-08-10 15:32:00.000000000,0.0,VHM0,1.020,0.966954,7.50,54.5,2021-08-10 16:00:00
2341,NO_TS_MO_NOO.nc,7.635000,54.46833,2021-08-10 16:01:59.999999744,0.0,VHM0,0.920,0.966954,7.50,54.5,2021-08-10 16:00:00
2342,NO_TS_MO_NOO.nc,7.635000,54.46833,2021-08-10 16:32:00.000000000,0.0,VHM0,0.910,0.943681,7.50,54.5,2021-08-10 17:00:00
2343,NO_TS_MO_NOO.nc,7.635000,54.46833,2021-08-10 17:02:00.000000000,0.0,VHM0,0.900,0.943681,7.50,54.5,2021-08-10 17:00:00


'result_df_interp:'

Unnamed: 0,bouy_file_name,bouy_longitude,bouy_latitude,bouy_time,bouy_depth,bouy_variable_name,bouy_variable_value,model_value,model_longitude,model_latitude,model_time
0,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 00:00:00.000000000,0.0,WSPD,5.999,6.636300,129.869995,36.91000,2021-01-01 00:00:00.000000000
1,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 01:00:00.000000000,0.0,WSPD,5.999,6.867942,129.869995,36.91000,2021-01-01 01:00:00.000000000
2,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 02:00:00.000000000,0.0,WSPD,3.999,7.545671,129.869995,36.91000,2021-01-01 02:00:00.000000000
3,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 03:00:00.000000000,0.0,WSPD,3.999,6.908834,129.869995,36.91000,2021-01-01 03:00:00.000000000
4,GL_TS_MO_2200190.nc,129.869995,36.91000,2021-01-01 04:00:00.000000000,0.0,WSPD,2.999,6.476328,129.869995,36.91000,2021-01-01 04:00:00.000000000
...,...,...,...,...,...,...,...,...,...,...,...
2340,NO_TS_MO_NOO.nc,7.635000,54.46833,2021-08-10 15:32:00.000000000,0.0,VHM0,1.020,0.976189,7.635000,54.46833,2021-08-10 15:32:00.000000000
2341,NO_TS_MO_NOO.nc,7.635000,54.46833,2021-08-10 16:01:59.999999744,0.0,VHM0,0.920,0.966935,7.635000,54.46833,2021-08-10 16:01:59.999999744
2342,NO_TS_MO_NOO.nc,7.635000,54.46833,2021-08-10 16:32:00.000000000,0.0,VHM0,0.910,0.957281,7.635000,54.46833,2021-08-10 16:32:00.000000000
2343,NO_TS_MO_NOO.nc,7.635000,54.46833,2021-08-10 17:02:00.000000000,0.0,VHM0,0.900,0.947337,7.635000,54.46833,2021-08-10 17:02:00.000000000


'Files with longes runtime:'

[('NO_TS_MO_NO1.nc (sel)', 13.335117816925049),
 ('NO_TS_MO_Butendiek.nc (sel)', 13.021711587905884),
 ('NO_TS_MO_NOR.nc (sel)', 12.630362033843994),
 ('NO_TS_MO_NO1.nc (interp)', 12.545516729354858),
 ('NO_TS_MO_Butendiek.nc (interp)', 12.199638366699219),
 ('NO_TS_MO_NOR.nc (interp)', 12.040164947509766),
 ('NO_TS_MO_PileSpiekeroog.nc (sel)', 8.755599737167358),
 ('NO_TS_MO_PileSpiekeroog.nc (interp)', 8.650689840316772),
 ('NO_TS_MO_Oseberg-A.nc (sel)', 2.882972478866577),
 ('NO_TS_MO_Q11.nc (sel)', 2.8608226776123047),
 ('NO_TS_MO_Roompotsluis.nc (sel)', 2.821261167526245),
 ('NO_TS_MO_Troll-A.nc (sel)', 2.7981905937194824),
 ('NO_TS_MO_Gullfaks-C.nc (sel)', 2.723310708999634),
 ('GL_TS_MO_Mesurho.nc (sel)', 2.690958023071289),
 ('GL_TS_MO_6100284.nc (sel)', 2.6882591247558594),
 ('GL_TS_MO_6100284.nc (interp)', 2.6783034801483154),
 ('GL_TS_MO_Mesurho.nc (interp)', 2.6583402156829834),
 ('GL_TS_MO_T8S180W.nc (sel)', 2.6352784633636475),
 ('GL_TS_MO_T8S180W.nc (interp)', 2.59417128

Running the Copernicus wspd model


  1%|          | 23/2487 [00:02<02:49, 14.51it/s]

BS_TS_MO_BurgasBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
BS_TS_MO_BurgasBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 11%|█         | 271/2487 [00:30<05:47,  6.38it/s]

MO_TS_MO_VIDA.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
MO_TS_MO_VIDA.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 44%|████▍     | 1103/2487 [02:12<01:31, 15.14it/s] 

MO_TS_MO_Molo-Bandiera.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
MO_TS_MO_Molo-Bandiera.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 57%|█████▋    | 1406/2487 [02:42<00:51, 21.08it/s]

MO_TS_MO_NADR-S1.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
MO_TS_MO_NADR-S1.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 64%|██████▎   | 1585/2487 [03:07<02:46,  5.43it/s]

MO_TS_MO_ESTELLENCS.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
MO_TS_MO_ESTELLENCS.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


 88%|████████▊ | 2198/2487 [04:29<00:16, 17.14it/s]

BS_TS_MO_VarnaBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')
BS_TS_MO_VarnaBuoySURF.nc  Could not extract data. Error:  Timestamp('2021-01-01 00:00:00')


100%|██████████| 2487/2487 [05:04<00:00,  8.16it/s]


'result_df_sel:'

Unnamed: 0,bouy_file_name,bouy_longitude,bouy_latitude,bouy_time,bouy_depth,bouy_variable_name,bouy_variable_value,model_value,model_longitude,model_latitude,model_time
0,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 15:24:59.999999744,-10.0,WSPD,2.1,2.921387,6.3125,55.0625,2021-08-24 15:00:00
1,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 16:25:00.000000000,-10.0,WSPD,2.3,2.801303,6.3125,55.0625,2021-08-24 16:00:00
2,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 17:25:00.000000000,-10.0,WSPD,2.4,2.910756,6.3125,55.0625,2021-08-24 17:00:00
3,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 18:24:59.999999744,-10.0,WSPD,2.6,3.038322,6.3125,55.0625,2021-08-24 18:00:00
4,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 19:25:00.000000000,-10.0,WSPD,2.9,2.815546,6.3125,55.0625,2021-08-24 19:00:00
...,...,...,...,...,...,...,...,...,...,...,...
3690,NO_TS_MO_6200150.nc,0.700000,53.599998,2021-07-26 03:00:00.000000000,0.0,WSPD,3.6,3.739759,0.6875,53.5625,2021-07-26 03:00:00
3691,NO_TS_MO_6200150.nc,0.700000,53.599998,2021-07-26 04:00:00.000000000,0.0,WSPD,4.1,4.345492,0.6875,53.5625,2021-07-26 04:00:00
3692,NO_TS_MO_6200150.nc,0.700000,53.599998,2021-07-26 05:00:00.000000000,0.0,WSPD,2.6,4.524787,0.6875,53.5625,2021-07-26 05:00:00
3693,NO_TS_MO_6200150.nc,0.700000,53.599998,2021-07-26 07:00:00.000000000,0.0,WSPD,3.1,4.234347,0.6875,53.5625,2021-07-26 07:00:00


'result_df_interp:'

Unnamed: 0,bouy_file_name,bouy_longitude,bouy_latitude,bouy_time,bouy_depth,bouy_variable_name,bouy_variable_value,model_value,model_longitude,model_latitude,model_time
0,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 15:24:59.999999744,-10.0,WSPD,2.1,2.861967,6.333333,55.000000,2021-08-24 15:24:59.999999744
1,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 16:25:00.000000000,-10.0,WSPD,2.3,2.835958,6.333333,55.000000,2021-08-24 16:25:00.000000000
2,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 17:25:00.000000000,-10.0,WSPD,2.4,2.947598,6.333333,55.000000,2021-08-24 17:25:00.000000000
3,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 18:24:59.999999744,-10.0,WSPD,2.6,2.912773,6.333333,55.000000,2021-08-24 18:24:59.999999744
4,NO_TS_MO_NsbII.nc,6.333333,55.000000,2021-08-24 19:25:00.000000000,-10.0,WSPD,2.9,2.794572,6.333333,55.000000,2021-08-24 19:25:00.000000000
...,...,...,...,...,...,...,...,...,...,...,...
3690,NO_TS_MO_6200150.nc,0.700000,53.599998,2021-07-26 03:00:00.000000000,0.0,WSPD,3.6,3.729330,0.700000,53.599998,2021-07-26 03:00:00.000000000
3691,NO_TS_MO_6200150.nc,0.700000,53.599998,2021-07-26 04:00:00.000000000,0.0,WSPD,4.1,4.379087,0.700000,53.599998,2021-07-26 04:00:00.000000000
3692,NO_TS_MO_6200150.nc,0.700000,53.599998,2021-07-26 05:00:00.000000000,0.0,WSPD,2.6,4.594951,0.700000,53.599998,2021-07-26 05:00:00.000000000
3693,NO_TS_MO_6200150.nc,0.700000,53.599998,2021-07-26 07:00:00.000000000,0.0,WSPD,3.1,4.281080,0.700000,53.599998,2021-07-26 07:00:00.000000000


'Files with longes runtime:'

[('NO_TS_MO_PileSpiekeroog.nc (sel)', 8.638317346572876),
 ('NO_TS_MO_PileSpiekeroog.nc (interp)', 8.30845594406128),
 ('NO_TS_MO_Oseberg-A.nc (sel)', 1.6627395153045654),
 ('NO_TS_MO_Heimdal.nc (sel)', 1.4289124011993408),
 ('NO_TS_MO_Gullfaks-C.nc (sel)', 1.4216618537902832),
 ('NO_TS_MO_Troll-A.nc (sel)', 1.38997220993042),
 ('NO_TS_MO_ZeebruggeDam.nc (sel)', 1.3000659942626953),
 ('NO_TS_MO_Heimdal.nc (interp)', 0.9999022483825684),
 ('NO_TS_MO_Troll-A.nc (interp)', 0.9986343383789062),
 ('NO_TS_MO_Gullfaks-C.nc (interp)', 0.9947028160095215),
 ('GL_TS_MO_Mesurho.nc (interp)', 0.9801650047302246),
 ('GL_TS_MO_Mesurho.nc (sel)', 0.978543758392334),
 ('GL_TS_MO_6100284.nc (sel)', 0.9602437019348145),
 ('GL_TS_MO_6100284.nc (interp)', 0.9566218852996826),
 ('NO_TS_MO_Oseberg-A.nc (interp)', 0.9391462802886963),
 ('NO_TS_MO_ZeebruggeDam.nc (interp)', 0.8633885383605957),
 ('GL_TS_MO_Smile.nc (sel)', 0.827049732208252),
 ('GL_TS_MO_6200310.nc (sel)', 0.8071751594543457),
 ('GL_TS_MO_620