This file is for extracting labels for wave height and wind speed for the sar images

This is notebook is under development and the TODOS are:
1. Read from real files or metadata dataframe
2. Depth (height) adjust the wind speed
3. Make sure the logic works in situations with both wave height parameters
4. Make sure it works with files with both wave height and wind speed in the survey data

In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import os
import pickle
from tqdm import tqdm
import math

In [None]:
#Load bouy survey dataframe
write_folder = '../bouy_survey/1h_survey'
result_df_fn = 'result_df'

with open(os.path.join(write_folder, result_df_fn),'rb') as f_r:
    bouy_survey_df = pickle.load(f_r)

In [50]:
#Reindex to enable faster lookup
bouy_survey_df['sar_name'] = bouy_survey_df['sar_url'].apply(lambda row: row.split('/')[-1].split('.')[0])
bouy_survey_df['bouy_name'] = bouy_survey_df['bouy_file_name'].apply(lambda row: row.split('.')[0])
bouy_survey_df = bouy_survey_df.set_index(['sar_name', 'bouy_name']).sort_index()

In [None]:
#wave height model
SWH_model = xr.open_dataset('/data/exjobb/sarssw/model/2021_swh_era5_world_wide.nc')

#Wind speed model
WSPD_model = xr.open_dataset('/data/exjobb/sarssw/model/WIND_GLO_PHY_global/all.nc')

In [54]:
#TODO remove, debugging
sar_img_list = [f"{x[0]}-{x[1]}-1.tiff" for x in list(bouy_survey_df.iloc[:10].index)]
sar_img_list

['S1A_EW_GRDH_1SDH_20210103T082208_20210103T082308_035971_0436C8_D32E-AR_TS_MO_Blakksnes-1.tiff',
 'S1A_EW_GRDH_1SDH_20210103T082208_20210103T082308_035971_0436C8_D32E-AR_TS_MO_Flateyjardufl-1.tiff',
 'S1A_EW_GRDH_1SDH_20210104T072454_20210104T072554_035985_043735_BACA-AR_TS_MO_Hornafjardardufl-1.tiff',
 'S1A_EW_GRDH_1SDH_20210104T072454_20210104T072554_035985_043735_BACA-AR_TS_MO_Kogurdufl-1.tiff',
 'S1A_EW_GRDH_1SDH_20210105T080545_20210105T080645_036000_0437AB_48AD-AR_TS_MO_Blakksnes-1.tiff',
 'S1A_EW_GRDH_1SDH_20210105T080545_20210105T080645_036000_0437AB_48AD-AR_TS_MO_Drangsnes-1.tiff',
 'S1A_EW_GRDH_1SDH_20210105T080545_20210105T080645_036000_0437AB_48AD-AR_TS_MO_Flateyjardufl-1.tiff',
 'S1A_EW_GRDH_1SDH_20210105T080545_20210105T080645_036000_0437AB_48AD-AR_TS_MO_Grimseyjarsund-1.tiff',
 'S1A_EW_GRDH_1SDH_20210105T080645_20210105T080715_036000_0437AB_520D-AR_TS_MO_Gardskagadufl-1.tiff',
 'S1A_EW_GRDH_1SDH_20210105T080645_20210105T080715_036000_0437AB_520D-AR_TS_MO_Grindavikurdufl

In [None]:
gb = bouy_survey_df.groupby(['sar_name','bouy_name'])['bouy_file_name'].count()
gb = gb[gb > 1]
display(gb)

In [None]:
bouy_survey_df

In [None]:
test = bouy_survey_df.loc[('S1A_EW_GRDM_1SDH_20210106T200345_20210106T200445_036022_043883_950A', 'GL_TS_MO_44078')]
display(test)
display(test.iloc[0])
long, lat, start_time, end_time = test.iloc[0][['bouy_longitude', 'bouy_latitude', 'sar_start_time', 'sar_stop_time']]
display(start_time, end_time, start_time+(end_time-start_time)/2)


In [None]:
sar_name, bouy_name = ('S1A_IW_GRDH_1SDV_20211227T230544_20211227T230609_041201_04E566_660B', 'GL_TS_MO_41159')
print(sar_name, bouy_name)
results = bouy_survey_df.loc[(sar_name, bouy_name)]
display(results)
display(type(results))


In [None]:
for a,b in results.iterrows():
    print(a, type(a))
    print(b, type(b))

for a,b in test.iterrows():
    print(a, type(a))
    print(b, type(b))

In [None]:
#Configure and program how the models work
var_list = ['SWH', 'WSPD']
var_names = {
    'SWH': ['VHM0', 'VAVH'],
    'WSPD': ['WSPD'],
}

models = {
    'SWH':SWH_model,
    'WSPD':WSPD_model,
}

model_coords_columns =  {
    'SWH': {'time':'time', 'longitude':'longitude', 'latitude':'latitude'},
    'WSPD': {'time':'time', 'longitude':'lon', 'latitude':'lat'},
}

model_value_functions = {
    'SWH': (lambda row: float(row['swh'])),
    'WSPD': (lambda row: math.sqrt(row['northward_wind']**2 + row['eastward_wind']**2)),
}

class Var_results:
    def __init__(self):
        self.found = False
        self.source = ''
        self.value = 0
        self.long = 0
        self.lat = 0
        self.time = np.datetime64('1970-01-01T00:00')
    def __repr__(self):
        return f"""
            found {self.found}
            value {self.value}
            source {self.source}
            long {self.source}
            lat {self.lat}
            time {self.time}
            """
    def to_list(self):
        return [
            self.value,
            self.source,
            self.long,
            self.lat,
            self.time,
        ]

In [52]:
#Load sar image names
sar_img_dir = '/data/exjob/TODO'
#sar_img_list = os.listdir(sar_img_dir)

#Create dataframe for labels
labels_df = pd.DataFrame({c: pd.Series(dtype=t) for c, t in {
            'sar_file_name':str,
            'SWH_value': float,
            'SWH_source':str,
            'SWH_long':float,
            'SWH_lat':float,
            'SWH_time':np.dtype('<M8[ns]'), #np.datetime64
            'WSPD_value': float,
            'WSPD_source':str,
            'WSPD_long':float,
            'WSPD_lat':float,
            'WSPD_time':np.dtype('<M8[ns]'), #np.datetime64
        }.items()})

for sar_img in sar_img_list:
    name_split = sar_img.split('-')
    assert len(name_split) == 3, f"wrong file name: {sar_img}"
    sar_name, bouy_name, _ = name_split

    result_vars = {
        'SWH':Var_results(),
        'WSPD':Var_results(),
    }

    #Extract value(s) form survey
    survey_results = bouy_survey_df.loc[(sar_name, bouy_name)]
    #save longitude, latitude and time for eventual model search
    long, lat, start_time, end_time = survey_results.iloc[0][['bouy_longitude', 'bouy_latitude', 'sar_start_time', 'sar_stop_time']]
    time = start_time+(end_time-start_time)/2

    #Save value(s) from survey
    for label, result in survey_results.iterrows():
        #display(result) #TODO remove
        for var in var_list:
            if (result['bouy_variable_name'] in var_names[var]) and (not result_vars[var].found):
                result_vars[var].found = True
                result_vars[var].source = 'bouy'
                result_vars[var].value = result['bouy_variable_value']
                result_vars[var].long = result['bouy_longitude']
                result_vars[var].lat = result['bouy_latitude']
                result_vars[var].time = result['bouy_time']

    #Complete missing value form model
    for var in var_list:
        if not result_vars[var].found:
            model_long = model_coords_columns[var]['longitude']
            model_lat = model_coords_columns[var]['latitude']
            model_time = model_coords_columns[var]['time']

            model_result = models[var].interp({
                model_long:xr.DataArray([long], dims='unused_dim'),
                model_lat:xr.DataArray([lat], dims='unused_dim'),
                model_time:xr.DataArray([time], dims='unused_dim')},
                method='linear').to_dataframe().iloc[0]

            result_vars[var].found = True
            result_vars[var].source = 'model'
            result_vars[var].value = model_value_functions[var](model_result)
            result_vars[var].long = model_result[model_long]
            result_vars[var].lat = model_result[model_lat]
            result_vars[var].time = model_result[model_time]

    #Append result to the dataframe
    labels_df.loc[len(labels_df.index)] = [sar_img] + result_vars['SWH'].to_list() + result_vars['WSPD'].to_list()

In [53]:
labels_df

Unnamed: 0,sar_file_name,SWH_value,SWH_source,SWH_long,SWH_lat,SWH_time,WSPD_value,WSPD_source,WSPD_long,WSPD_lat,WSPD_time
0,S1A_IW_GRDH_1SDV_20211227T230544_20211227T2306...,1.109,bouy,-76.948997,34.213001,2021-12-27 22:56:00,9.740965,model,-76.948997,34.213001,2021-12-27 23:05:56.500
1,S1A_IW_GRDH_1SDV_20211215T230545_20211215T2306...,0.849,bouy,-76.948997,34.213001,2021-12-15 22:56:00,3.740545,model,-76.948997,34.213001,2021-12-15 23:05:57.500
2,S1A_IW_GRDH_1SDV_20211203T230546_20211203T2306...,0.809,bouy,-76.948997,34.213001,2021-12-03 22:56:00,4.310575,model,-76.948997,34.213001,2021-12-03 23:05:58.500
3,S1A_IW_GRDH_1SDV_20211121T230546_20211121T2306...,1.249,bouy,-76.948997,34.213001,2021-11-21 22:56:00,3.397809,model,-76.948997,34.213001,2021-11-21 23:05:58.500
4,S1A_IW_GRDH_1SDV_20211109T230546_20211109T2306...,1.199,bouy,-76.948997,34.213001,2021-11-09 22:56:00,0.072924,model,-76.948997,34.213001,2021-11-09 23:05:58.500
5,S1A_IW_GRDH_1SDV_20210922T230546_20210922T2306...,2.139,bouy,-76.948997,34.213001,2021-09-22 22:56:00,9.723323,model,-76.948997,34.213001,2021-09-22 23:05:58.500
6,S1A_IW_GRDH_1SDV_20210910T230546_20210910T2306...,1.429,bouy,-76.948997,34.213001,2021-09-10 22:56:00,4.877619,model,-76.948997,34.213001,2021-09-10 23:05:58.500
7,S1A_IW_GRDH_1SDV_20210829T230545_20210829T2306...,0.749,bouy,-76.948997,34.213001,2021-08-29 22:56:00,4.437728,model,-76.948997,34.213001,2021-08-29 23:05:57.500
8,S1A_IW_GRDH_1SDV_20210817T230545_20210817T2306...,1.159,bouy,-76.948997,34.213001,2021-08-17 22:56:00,7.528013,model,-76.948997,34.213001,2021-08-17 23:05:57.500
9,S1A_IW_GRDH_1SDV_20210805T230544_20210805T2306...,1.329,bouy,-76.948997,34.213001,2021-08-05 23:26:00,3.094428,model,-76.948997,34.213001,2021-08-05 23:05:56.500
