In [62]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import pickle

import tifffile as tif

import xarray as xr
import os

from collections import defaultdict
from itertools import islice
import random

from skimage.feature import graycomatrix, graycoprops
from skimage import io, color, img_as_ubyte
from tqdm import tqdm

import math

import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '/home/sarssw/axel/sarssw')

import pipeline.sar_dataset_loader as sar_dataset_loader

from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [3]:
#Load bouy survey dataframe
bouy_survey_fn = '../bouy_survey/1h_survey/result_df'
swh_model_fn = '/data/exjobb/sarssw/model/2021_swh_era5_world_wide.nc'
wspd_model_fn = '/data/exjobb/sarssw/model/WIND_GLO_PHY_global/all.nc'

In [8]:
sar_dir = '/data/exjobb/sarssw/sar_multiprocess/'   
svc_file = '../sar_survey/out/homogenity_svc.pkl'
sar_paths = [os.path.join(sar_dir, f) for f in os.listdir(sar_dir)[:10_000]]

In [9]:
dataset_df = sar_dataset_loader.load_features_labels_df(sar_paths, svc_file, bouy_survey_fn, swh_model_fn, wspd_model_fn)
display(dataset_df)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Calculating features


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [18:12<00:00,  9.15it/s]


Collecting labels


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3529/3529 [05:04<00:00, 11.60it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,file_name,pol,offset_index,time,lon,lat,shape,hom_test,contrast,dissimilarity,...,SWH_value,SWH_source,SWH_lon,SWH_lat,SWH_time,WSPD_value,WSPD_source,WSPD_lon,WSPD_lat,WSPD_time
sar_name,bouy_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
S1A_IW_GRDH_1SDV_20210105T174127_20210105T174152_036006_0437EB_28A0,NO_TS_MO_6200304,S1A_IW_GRDH_1SDV_20210105T174127_20210105T1741...,VV,3,2021-01-05T17:41:33.428514560,1.806937,51.103794,"[2, 200, 200]",True,1317.699763,27.591974,...,1.30,bouy,1.8000,51.103001,2021-01-05 18:00:00,10.800001,bouy,1.8000,51.103001,2021-01-05 18:00:00.000
S1A_IW_GRDH_1SDV_20210105T174127_20210105T174152_036006_0437EB_28A0,NO_TS_MO_6200304,S1A_IW_GRDH_1SDV_20210105T174127_20210105T1741...,VH,3,2021-01-05T17:41:33.428514560,1.806937,51.103794,"[2, 200, 200]",True,2277.659526,36.902053,...,1.30,bouy,1.8000,51.103001,2021-01-05 18:00:00,10.800001,bouy,1.8000,51.103001,2021-01-05 18:00:00.000
S1A_IW_GRDH_1SDV_20210105T174127_20210105T174152_036006_0437EB_28A0,NO_TS_MO_6200304,S1A_IW_GRDH_1SDV_20210105T174127_20210105T1741...,VV,7,2021-01-05T17:41:33.428514560,1.792880,51.102225,"[2, 200, 200]",False,297.051842,13.003737,...,1.30,bouy,1.8000,51.103001,2021-01-05 18:00:00,10.800001,bouy,1.8000,51.103001,2021-01-05 18:00:00.000
S1A_IW_GRDH_1SDV_20210105T174127_20210105T174152_036006_0437EB_28A0,NO_TS_MO_6200304,S1A_IW_GRDH_1SDV_20210105T174127_20210105T1741...,VH,7,2021-01-05T17:41:33.428514560,1.792880,51.102225,"[2, 200, 200]",False,336.131132,14.058974,...,1.30,bouy,1.8000,51.103001,2021-01-05 18:00:00,10.800001,bouy,1.8000,51.103001,2021-01-05 18:00:00.000
S1A_IW_GRDH_1SDV_20210105T174127_20210105T174152_036006_0437EB_28A0,NO_TS_MO_6200304,S1A_IW_GRDH_1SDV_20210105T174127_20210105T1741...,VV,0,2021-01-05T17:41:33.428514560,1.799909,51.103010,"[2, 200, 200]",False,301.612526,13.131105,...,1.30,bouy,1.8000,51.103001,2021-01-05 18:00:00,10.800001,bouy,1.8000,51.103001,2021-01-05 18:00:00.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
S1B_IW_GRDH_1SDV_20211220T173236_20211220T173301_030112_039877_1123,NO_TS_MO_WielingenNoord,S1B_IW_GRDH_1SDV_20211220T173236_20211220T1733...,VH,3,2021-12-20T17:32:50.392121856,3.422604,51.434250,"[2, 200, 200]",True,2166.084132,36.099289,...,0.29,bouy,3.4155,51.433399,2021-12-20 17:30:00,2.745161,model,3.4155,51.433399,2021-12-20 17:32:48.500
S1B_IW_GRDH_1SDV_20211220T173236_20211220T173301_030112_039877_1123,NO_TS_MO_ZeebruggePortBuoy,S1B_IW_GRDH_1SDV_20211220T173236_20211220T1733...,VV,7,2021-12-20T17:32:49.552413184,3.186021,51.355200,"[2, 200, 200]",False,30.694053,0.867474,...,0.24,bouy,3.1930,51.355999,2021-12-20 17:30:00,3.065272,model,3.1930,51.355999,2021-12-20 17:32:48.500
S1B_IW_GRDH_1SDV_20211220T173236_20211220T173301_030112_039877_1123,NO_TS_MO_ZeebruggePortBuoy,S1B_IW_GRDH_1SDV_20211220T173236_20211220T1733...,VH,7,2021-12-20T17:32:49.552413184,3.186021,51.355200,"[2, 200, 200]",False,31.827368,0.937947,...,0.24,bouy,3.1930,51.355999,2021-12-20 17:30:00,3.065272,model,3.1930,51.355999,2021-12-20 17:32:48.500
S1B_IW_GRDH_1SDV_20211220T173236_20211220T173301_030112_039877_1123,NO_TS_MO_ZeebruggePortBuoy,S1B_IW_GRDH_1SDV_20211220T173236_20211220T1733...,VV,0,2021-12-20T17:32:49.552413184,3.193080,51.356017,"[2, 200, 200]",False,65.959526,1.222211,...,0.24,bouy,3.1930,51.355999,2021-12-20 17:30:00,3.065272,model,3.1930,51.355999,2021-12-20 17:32:48.500


In [10]:
with open('./saved_datasets/10_000_all.pkl','wb') as f:
    pickle.dump(dataset_df,f)

In [12]:
dataset_df.describe()

Unnamed: 0,offset_index,lon,lat,contrast,dissimilarity,homogeneity,energy,correlation,ASM,sigma_mean,...,sigma_mean_over_var,sigma_min,sigma_max,sigma_range,SWH_value,SWH_lon,SWH_lat,WSPD_value,WSPD_lon,WSPD_lat
count,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,20000.0,...,20000.0,20000.0,20000.0,20000.0,19540.0,20000.0,20000.0,20000.0,20000.0,20000.0
mean,3.9853,3.34866,51.697458,713.268936,16.530972,0.225253,0.151871,0.036222,0.092595,0.017792,...,152.656817,-0.001661,14.109707,14.111368,0.874858,3.348563,51.697337,6.729422,3.348563,51.697337
std,2.597195,0.832471,0.555309,656.951135,11.932541,0.291685,0.263692,0.086831,0.233417,0.061824,...,191.511867,0.001209,208.806843,208.806801,0.654513,0.832468,0.555321,3.321047,0.832468,0.555321
min,0.0,1.559438,50.744174,3.006053,0.021763,0.02901,0.010691,-0.084233,0.000114,-0.001057,...,-1193.484991,-0.004728,0.002671,0.0037,0.01,1.5677,50.740501,0.141065,1.5677,50.740501
25%,2.0,2.891639,51.380243,58.772572,3.98125,0.045275,0.017352,0.002416,0.000301,0.000356,...,23.025709,-0.002392,0.018784,0.021383,0.4,2.883,51.3811,4.158809,2.883,51.3811
50%,4.0,3.268071,51.560559,504.676645,17.068408,0.066872,0.028018,0.010972,0.000785,0.00363,...,119.794349,-0.001699,0.091723,0.093306,0.72,3.276389,51.564999,6.535422,3.276389,51.564999
75%,6.0,3.423983,51.773013,1349.453914,28.306474,0.25878,0.099806,0.028405,0.009961,0.019058,...,234.790883,-0.001046,0.407423,0.4084,1.15,3.4155,51.768566,8.899603,3.4155,51.768566
max,8.0,8.1773,54.224891,2381.686789,37.866184,0.999082,0.998737,0.734424,0.997475,2.292296,...,1379.812373,0.009541,10539.817731,10539.820857,4.49,8.168055,54.219299,21.100001,8.168055,54.219299


In [84]:
merge_df.pol_VH

0       VH
1       VH
2       VH
3       VH
4       VH
        ..
3700    VH
3701    VH
3702    VH
3703    VH
3704    VH
Name: pol_VH, Length: 3705, dtype: object

In [67]:
#filter homogenious images with IW mode and no na
hom_df = dataset_df[dataset_df.hom_test]
hom_df = hom_df[hom_df.file_name.str.contains('IW')]

db_feats = [
    'sigma_mean','sigma_var', 'sigma_mean_over_var', 
]

for feat in db_feats:
    hom_df[feat + '_dB'] = 10 * np.log10(hom_df[feat])

hom_df = hom_df.dropna()

#merge the vv and vh polarization
VV_df, VH_df = hom_df[hom_df.pol == 'VV'], hom_df[hom_df.pol == 'VH']
merge_df = VV_df.merge(VH_df, on='file_name', suffixes=('_VV', '_VH'))

features = [
    'contrast', 'dissimilarity', 'homogeneity', 
    'energy', 'correlation', 'ASM', 'sigma_mean',
    'sigma_var', 'sigma_mean_over_var', 'sigma_min', 
    'sigma_max', 'sigma_range'
] #+ [feat + '_dB' for feat in db_feats]

X, y = merge_df[[f + p for f in features for p in ['_VV', '_VH']]], merge_df.SWH_value_VV
display(X)
display(y) 

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,correlation_VV,correlation_VH,ASM_VV,ASM_VH,sigma_mean_VV,sigma_mean_VH,sigma_var_VV,sigma_var_VH,sigma_mean_over_var_VV,sigma_mean_over_var_VH,sigma_min_VV,sigma_min_VH,sigma_max_VV,sigma_max_VH,sigma_range_VV,sigma_range_VH
0,0.050576,0.012838,0.000183,0.000459,0.050029,0.000460,0.000711,1.598546e-06,70.340214,287.929564,-0.000731,-0.002045,0.257758,0.007432,0.258489,0.009478
1,0.029183,0.012029,0.000157,0.000529,0.051822,0.000508,0.000722,1.645633e-06,71.751888,308.439294,0.001196,-0.002030,0.246017,0.008822,0.244821,0.010852
2,0.062388,0.009146,0.000228,0.000931,0.035190,0.000558,0.000326,5.856930e-07,107.900932,953.479316,0.001280,-0.000977,0.196051,0.006798,0.194772,0.007775
3,0.056301,0.009971,0.000239,0.000880,0.034396,0.000577,0.000308,5.915944e-07,111.689583,975.100439,0.001158,-0.000951,0.196051,0.005629,0.194894,0.006580
4,0.078309,0.021355,0.000247,0.000797,0.033889,0.000544,0.000304,5.701430e-07,111.314117,954.877693,0.001158,-0.000951,0.196051,0.005374,0.194894,0.006326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3700,0.025356,-0.011947,0.000185,0.000252,0.018839,0.000088,0.000108,1.455448e-06,174.491944,60.531530,-0.001462,-0.002344,0.103635,0.007712,0.105097,0.010056
3701,0.027226,0.003776,0.000182,0.000290,0.019828,0.000103,0.000113,1.421964e-06,175.633248,72.521346,-0.001462,-0.002344,0.106107,0.008484,0.107569,0.010828
3702,0.036400,-0.013106,0.000205,0.000257,0.018514,0.000065,0.000101,1.335488e-06,183.401642,48.971614,-0.001726,-0.002344,0.106107,0.006888,0.107833,0.009232
3703,0.021032,-0.004049,0.000182,0.000269,0.019052,0.000091,0.000109,1.469930e-06,174.662545,61.945713,-0.001462,-0.002344,0.103635,0.008484,0.105097,0.010828


0       1.30
1       1.30
2       1.65
3       1.65
4       1.65
        ... 
3700    0.29
3701    0.29
3702    0.29
3703    0.29
3704    0.29
Name: SWH_value_VV, Length: 3705, dtype: float64

In [68]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [69]:
est = GradientBoostingRegressor(
     n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0,
     loss='squared_error'
).fit(X_train, y_train)

In [70]:
mean_squared_error(y_test, est.predict(X_test))

0.20307774392844805

In [71]:
mean_squared_error(y_test, est.predict(X_test), squared=False)

0.4506414804791588

In [72]:
regr = SVR()
regr.fit(X_train, y_train)

In [73]:
mean_squared_error(y_test, regr.predict(X_test))

0.1493639656275556

In [74]:
mean_squared_error(y_test, regr.predict(X_test), squared=False)

0.3864763454954981

In [89]:
len(est.feature_importances_), len(X.columns)

(16, 16)

In [91]:
sorted(list(zip(est.feature_importances_, X.columns)), key=lambda x : x[0])

[(0.0, 'correlation_VV'),
 (0.0, 'sigma_var_VH'),
 (0.0, 'sigma_max_VH'),
 (0.0, 'sigma_range_VH'),
 (0.015120209354394318, 'correlation_VH'),
 (0.01660676970338113, 'ASM_VH'),
 (0.018023940192203092, 'sigma_var_VV'),
 (0.020531865464077703, 'sigma_min_VH'),
 (0.02416547397812679, 'ASM_VV'),
 (0.03278895163860681, 'sigma_min_VV'),
 (0.03708816798849404, 'sigma_mean_VV'),
 (0.053539631782694226, 'sigma_mean_over_var_VH'),
 (0.06712231643136607, 'sigma_mean_over_var_VV'),
 (0.1187434512286565, 'sigma_range_VV'),
 (0.2192504222231688, 'sigma_max_VV'),
 (0.37701880001483057, 'sigma_mean_VH')]

In [53]:
merge_df.corr().SWH_value_VV.sort_values(ascending=False, key=abs)

  merge_df.corr().SWH_value_VV.sort_values(ascending=False, key=abs)


SWH_value_VH                 1.000000
SWH_value_VV                 1.000000
WSPD_value_VH                0.751548
WSPD_value_VV                0.751548
sigma_max_VV                 0.627218
sigma_range_VV               0.626411
sigma_mean_over_var_dB_VV   -0.626240
sigma_var_dB_VV              0.615380
sigma_mean_VV                0.602393
sigma_mean_dB_VV             0.580755
sigma_min_VV                 0.536934
sigma_mean_over_var_VV      -0.517473
sigma_mean_dB_VH             0.511404
sigma_var_VV                 0.489991
sigma_mean_VH                0.487071
sigma_mean_over_var_VH       0.393180
sigma_mean_over_var_dB_VH    0.381079
SWH_lat_VV                   0.312960
SWH_lat_VH                   0.312960
WSPD_lat_VV                  0.312960
WSPD_lat_VH                  0.312960
lat_VH                       0.312735
lat_VV                       0.312735
sigma_var_dB_VH              0.268786
sigma_max_VH                 0.255381
lon_VV                       0.236568
lon_VH      

In [60]:
merge_df.corr().WSPD_value_VV.sort_values(ascending=False, key=abs)

  merge_df.corr().WSPD_value_VV.sort_values(ascending=False, key=abs)


WSPD_value_VH                1.000000
WSPD_value_VV                1.000000
SWH_value_VH                 0.751548
SWH_value_VV                 0.751548
sigma_var_dB_VV              0.621191
sigma_mean_dB_VV             0.617718
sigma_mean_over_var_dB_VV   -0.591353
sigma_mean_VV                0.569824
sigma_min_VV                 0.559172
sigma_max_VV                 0.558342
sigma_range_VV               0.556858
sigma_mean_dB_VH             0.553572
sigma_mean_over_var_VV      -0.519147
sigma_mean_VH                0.513750
sigma_mean_over_var_dB_VH    0.450983
sigma_mean_over_var_VH       0.448744
sigma_var_VV                 0.416234
ASM_VV                      -0.299396
energy_VV                   -0.284739
SWH_lat_VV                   0.264619
SWH_lat_VH                   0.264619
WSPD_lat_VV                  0.264619
WSPD_lat_VH                  0.264619
lat_VH                       0.264512
lat_VV                       0.264512
sigma_var_dB_VH              0.237186
sigma_max_VH

In [93]:
sns.pairplot(data=merge_df)



<seaborn.axisgrid.PairGrid at 0x7fca79961810>

Error in callback <function flush_figures at 0x7fe9ad375a80> (for post_execute):



KeyboardInterrupt

