# Imports 

In [1]:
import sys
sys.path.append('..')

import utils.data_structures as ds
import utils.helpers as hp

import os
import geopandas as gpd
import pandas as pd
import numpy as np
import fiona.crs as fcrs

from sklearn.mixture import GaussianMixture as gmm
from sklearn.linear_model import LinearRegression
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

import bokeh

import geoviews as gv
gv.extension('bokeh')

import holoviews as hv
hv.notebook_extension('bokeh')

import hvplot.xarray
import hvplot.pandas

from bokeh.models import HoverTool
from holoviews.operation.datashader import regrid, datashade, rasterize
from holoviews.operation import histogram, decimate
from holoviews import opts
import datashader.transfer_functions as tf

  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)


In [2]:
data = '/Volumes/HADDOCK 460GB/swiss_project/data/'
query_dir = '/Volumes/HADDOCK 460GB/swiss_project/query_dir'
query_dir_name = 'sa3d_WGS84_10m_v2'

## Load Data from Query

In [3]:
saved_query_dir = os.path.join(query_dir, query_dir_name)
sm = ds.SWISSMap(load_dir=saved_query_dir, calc_dir=query_dir, mission=2, prod_nr=6)

In [4]:
sm.load_dir_specs

{'query_paths': {'dem': '/Volumes/HADDOCK 460GB/swiss_project/data/elevation/sa3d_10m/Clip_swissALTI3D_10m_elevation_r2015_core_utm32_eWGS84.tif',
  'land_cover': '/Volumes/HADDOCK 460GB/swiss_project/data/land_cover/corine/CLC_2012_utm32_DeFROST.tif',
  'slope': '/Volumes/HADDOCK 460GB/swiss_project/data/elevation/ALOS/ALOS_PRISM_clip_DeFROST_slope.tif',
  'snow': '/Volumes/HADDOCK 460GB/swiss_project/data/snow_cover/',
  'icesat': '/Volumes/HADDOCK 460GB/swiss_project/data/icesat2/ATL06_2019_02_22',
  'slf': '/Volumes/HADDOCK 460GB/swiss_project/data/SLF'},
 'is_query_saved_in_query_dir': False,
 'query_dir_name': 'sa3d_WGS84_10m_v2',
 'time': (None, None),
 'bbox': None,
 'segments': None,
 'epsg': None,
 'chunks': 'auto',
 'n_jobs': 4}

In [5]:
qmap, snow_data, ice_data, slf_data, bbox = sm.load()

Using  /Volumes/HADDOCK 460GB/swiss_project/query_dir/sa3d_WGS84_10m_v2 as query_dir...
Query ICESat data ...


  1%|          | 2/319 [00:00<00:16, 19.70it/s]

Query snow cover data ...


100%|██████████| 319/319 [00:06<00:00, 47.13it/s] 
100%|██████████| 3/3 [00:00<00:00, 34.61it/s]

Query background rasters ...
Query SLF data ...





In [6]:
ice_data = gpd.GeoDataFrame(ice_data.loc[~pd.isnull(ice_data['dem']) & ~pd.isnull(ice_data['snow_cover'])], 
                            crs=fcrs.from_epsg(4326))
ice_data['time'] = ice_data['time'].astype(np.datetime64)

In [7]:
ice_data.loc[:, 'height_diff'] = ice_data.loc[:, 'height'] - ice_data.loc[:, 'dem']

In [8]:
ice_data.head()

Unnamed: 0,ascending,cycle_number,ground_track_id,height,q_flg,rgt,s_elv,time,x,y,geometry,dem,land_cover,slope,snow_cover_time_delta,snow_cover_time_ind,snow_cover,height_diff
5818,True,1.0,gt1l,1864.462036,0,290.0,100.696312,2018-10-17 14:42:23.596978,8.758259,46.318051,POINT (8.758259077702691 46.31805123918301),1860.438721,23.0,35.34391,-33456403022000,71.0,0,4.023315
5819,True,1.0,gt1l,1721.466431,0,290.0,51.53986,2018-10-17 14:42:24.665254,8.748774,46.386057,POINT (8.748773957348378 46.38605684308412),1720.698486,26.0,25.770983,-33455334746000,71.0,0,0.767944
5820,True,1.0,gt1l,2073.385986,0,290.0,101.772285,2018-10-17 14:42:24.783609,8.747734,46.393594,POINT (8.747734032276407 46.39359381093187),2072.991943,32.0,25.811035,-33455216391000,71.0,0,0.394043
5821,True,1.0,gt1l,2079.492188,0,290.0,105.075165,2018-10-17 14:42:24.786427,8.74771,46.393773,POINT (8.747709553575515 46.393773280587),2076.958252,32.0,29.46554,-33455213573000,71.0,0,2.533936
5822,True,1.0,gt1l,2282.73584,0,290.0,27.209972,2018-10-17 14:42:24.842658,8.747224,46.397363,POINT (8.747223540712147 46.39736292417498),2281.469727,32.0,28.987577,-33455157342000,71.0,0,1.266113


# Overview

In [9]:
summary = ice_data.describe().transpose()
summary

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
cycle_number,180477.0,1.807527,0.6373978,1.0,1.0,2.0,2.0,3.0
height,180477.0,1596.009,832.3812,244.61,855.3181,1589.771,2305.67,4310.811
q_flg,180477.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
rgt,180477.0,806.6939,412.5657,8.0,412.0,831.0,1235.0,1357.0
s_elv,180477.0,30.13317,33.83157,0.4000067,10.13109,23.41444,37.6198,3819.228
x,180477.0,8.364342,0.8017735,6.802981,7.658719,8.456991,9.036754,9.547459
y,180477.0,46.56644,0.3105812,46.03285,46.31827,46.51831,46.79776,47.37563
dem,180477.0,-inf,,-inf,614.19,1425.175,2213.699,4302.709
land_cover,180477.0,23.90025,8.890505,1.0,18.0,26.0,31.0,41.0
slope,180477.0,12.29753,8.316291,0.0,5.396424,11.91372,17.86786,73.17075


In [10]:
size = dict(width=800, height=600)
plot_opts = {'width':600, 'height':600}
shade_defaults = dict(x_sampling=1, y_sampling=1, width=1200, height=682, cmap='white')
height_diff_range = (-30, 30)

# Height Difference over Water Bodies

## Cluster Points over Water

In [11]:
kms_per_radian = 6371.0088
epsilon = 1 / kms_per_radian

water_idxs = [40, 41, 44]
water_ice = ice_data.loc[ice_data.loc[:, 'land_cover'].isin(water_idxs)].copy()

clusters, core_idxs = hp.cluster_points(water_ice, epsilon=epsilon, min_samples=1, 
                                        algorithm='ball_tree', metric='haversine')
water_ice.loc[:, 'cluster'] = clusters

water_ice.loc[:, 'is_core'] = False
idxs = water_ice.iloc[core_idxs].index
water_ice.loc[idxs, 'is_core'] = True

In [12]:
hover = HoverTool(tooltips=[('cluster', "@cluster")])#, formatters={'t_iso': 'datetime'})

#pts = gv.Points(water_ice[['x', 'y', 'time', 'cluster']].query('cluster == 6')).opts(size=3)
pts = gv.Points(water_ice[['x', 'y', 'time', 'cluster']]).opts(size=3)
plot = gv.tile_sources.ESRI * pts.opts(color='cluster', tools=[hover])
plot.opts(**plot_opts)

In order to really exclude highly dynamical water reservoirs, we need to go over the clusters by hand.

In [13]:
acc_clusters = np.array([11, 12, 34, 35, 39, 43, 38, 37, 18, 20, 27, 5, 6, 7, 13, 14, 15, 44, 31, 28,])

In [14]:
idxs = water_ice['cluster'].isin(acc_clusters)
water_ice = water_ice[idxs]

## Height Difference over Clusters

In [15]:
bw = hv.BoxWhisker(water_ice[['cluster', 'height_diff']], ['cluster'], 'height_diff').opts(ylim=height_diff_range, **size)
yhist = histogram(bw, dimension='height_diff', bin_range=height_diff_range)
bw << yhist

  lpad = span*(padding[0])
  upad = span*(padding[1])
  lpad = span*(padding[0])
  upad = span*(padding[1])


In [16]:
std_height_diff_per_cluster = water_ice[['cluster', 'height_diff']].groupby('cluster').apply(np.std)['height_diff']
std_height_diff_per_cluster

cluster
5      9.982196
6     19.318679
7      2.895611
11     0.107480
12          NaN
13     0.000000
14     0.425786
15     0.000000
18     0.093093
20     0.127406
27          NaN
28     0.000000
31     0.688833
34     0.000000
35     0.000000
37          NaN
38     0.088146
39     2.000349
43     2.957883
44     2.577988
Name: height_diff, dtype: float64

Height Differences seem to be constant over individual water bodies. From introspection it seems that outliers are due to mainly from misclassification (no water). Large spreads are excpected in time spreaded data (see below). The height differences among stable measurements have spread due to maybe a non-constant shift.

In [17]:
water_ice[['cluster', 'time']].query('cluster == 5').hvplot.hist('time')

## Height Difference over Water as a Function of Altitude and Time

### Over Stable Waterbodies

In [18]:
filt_water_ice_pts = water_ice[['dem', 'cluster', 'height_diff', 'time']].query('height_diff < 100')
#filt_water_ice_pts['time'] = pd.to_numeric(filt_water_ice_pts['time'])

In [19]:
pts = filt_water_ice_pts[['dem', 'height_diff', 'time']].hvplot.scatter('dem', 'height_diff').opts(size=3)
xhist, yhist = (histogram(pts,  dimension=dim) for dim in ['dem', 'height_diff'])

composition = pts << yhist << xhist
composition.opts(**size)

In [20]:
filt_water_ice_pts[['time', 'height_diff', 'cluster']].hvplot.scatter('time', 'height_diff', 
                                                                      color='cluster', colormap='nipy_spectral')

# Height Differences over rgt

In [21]:
np.unique(ice_data.rgt)

array([   8.,   69.,  229.,  290.,  351.,  389.,  412.,  450.,  511.,
        732.,  793.,  831.,  854.,  892.,  953., 1014., 1174., 1235.,
       1273., 1296., 1334., 1357.])

In [22]:
hover = HoverTool(tooltips=[('rgt', "@rgt"), ('land_cover', '@land_cover')])#, formatters={'t_iso': 'datetime'})

pts = decimate(gv.Points(ice_data[['x', 'y', 'time', 'rgt', 'land_cover']]).opts(size=3))
plot = gv.tile_sources.ESRI * pts.opts(color='rgt', tools=[hover])
plot.opts(**plot_opts)

In [23]:
rgt = 8
ground_track_id = 'gt1l'
#ice_data['time'] = pd.to_numeric(ice_data['time'])
rgt_ice = ice_data[['y', 'rgt', 'height', 'height_diff', 'dem', 'time', 'ground_track_id']].query('rgt == ' \
                                                + str(rgt))[ice_data.ground_track_id == ground_track_id]
                                                
(rgt_ice.hvplot('y', ['height']) + rgt_ice.hvplot('y', 'height_diff') + rgt_ice.hvplot.hist('time')).cols(1)

  """
  lpad = span*(padding[0])
  upad = span*(padding[1])
  lpad = span*(padding[0])
  upad = span*(padding[1])


# Condition data on a priori sensitive regions

In [None]:
ice_data[['snow_cover_time_delta']] /= (1e9 * 3600* 24)

In [None]:
all_ice = ice_data[['y', 'x', 'height', 'height_diff', 'time', 'dem', 'snow_cover', 'land_cover', 
                    'slope', 'rgt', 'snow_cover_time_delta']]
all_ice = all_ice.query(str(height_diff_range[0]) + '< height_diff < ' + str(height_diff_range[1])) # to exclude obvious outliers
all_ice = all_ice.query('-1 < snow_cover_time_delta < 1')

In [None]:
hv.BoxWhisker(all_ice[['land_cover', 'height_diff']], ['land_cover'], 'height_diff').opts(**size)

In [None]:
#exclude = [22, 23, 24, 25, 16, 34]
#idxs = ~all_ice['land_cover'].isin(exclude)
#all_ice = all_ice[idxs]
include = [18, 21, 26, 27, 32, 31]
idxs = all_ice['land_cover'].isin(include)
all_ice = all_ice[idxs]

In [None]:
hover = HoverTool(tooltips=[('rgt', "@rgt"), ('land_cover', '@land_cover'), ('snow_cover', '@snow_cover')])#, formatters={'t_iso': 'datetime'})

pts = gv.Points(all_ice[['x', 'y', 'land_cover', 'rgt', 'snow_cover']]).opts(size=3)
plot = gv.tile_sources.ESRI * pts.opts(color='rgt', tools=[hover])
plot.opts(**plot_opts)

In [None]:
thr = 200
all_ice.loc[:,'binary_snow_cover'] = (all_ice.loc[:, 'snow_cover'] > thr).astype(np.int)
cmap=['blue', 'red']

height_diff_height = all_ice.hvplot('height_diff', 'dem', kind='scatter', color='binary_snow_cover', 
                                             colormap=cmap, alpha=0.2, colorbar=False)

pts_snow = hv.Points(all_ice.query('binary_snow_cover == 1'))
pts_no_snow = hv.Points(all_ice.query('binary_snow_cover == 0'))

xhist, yhist = (histogram(pts_no_snow, dimension=dim, num_bins=60) *
                histogram(pts_snow, dimension=dim, num_bins=60) 
                for dim in ['height_diff', 'dem'])

composition = decimate(height_diff_height).opts(ylim=height_diff_range) << yhist.opts(width=125) << xhist.opts(height=125)
composition.opts(opts.Histogram(alpha=0.3))

In [None]:
all_ice['quantile_snow_cover'] = pd.cut(all_ice['snow_cover'], 5, labels=False)

height_diff_height = all_ice.hvplot('height_diff', 'dem', kind='scatter', color='quantile_snow_cover', 
                                    colormap='viridis', alpha=0.2, colorbar=False)

pts_qsnow = [hv.Points(all_ice.query('quantile_snow_cover == ' + str(i))) 
             for i in np.unique(all_ice['quantile_snow_cover'])]

xhist, yhist = (hv.Overlay([histogram(pts, dimension=dim, num_bins=60) for pts in pts_qsnow])
                for dim in ['height_diff', 'dem'])

composition = decimate(height_diff_height).opts(ylim=height_diff_range) << yhist.opts(width=125) << xhist.opts(height=125)
composition.opts(opts.Histogram(alpha=0.3))

## Sensitivity of Mean

In [None]:
all_ice['slope'] = np.abs(all_ice['slope'])

In [None]:
mean_snow_height_diff = []
thresholds = np.arange(10, 240)
linear_regressor = LinearRegression() 

X = all_ice.query('slope < ' + str(8)).copy()
for thr in thresholds:
    X.loc[:,'binary_snow_cover'] = (X.loc[:, 'snow_cover'] > thr)
    #means = all_ice.groupby('binary_snow_cover').mean()['height_diff']
    #mean_snow_height_diff.append(means[1] - means[0])
    x, y = X.loc[:, 'binary_snow_cover'].values.reshape(-1, 1), X.loc[:, 'height_diff'].values
    linear_regressor.fit(x, y)
    mean_snow_height_diff.append(linear_regressor.coef_[0])
    
line = hv.Curve((thresholds, mean_snow_height_diff)).opts(color='red')
line

In [None]:
snow_thresholds = np.arange(10, 240)
min_height_diff = -30
max_height_diff = 30
slope_thresholds = np.quantile(all_ice['slope'], np.arange(0.05, 0.99, 0.03))

In [None]:
linear_regressor = LinearRegression() 

coef_grid = np.zeros((len(slope_thresholds), len(snow_thresholds)))
for i, slope_thr in enumerate(slope_thresholds):
    for j, snow_thr in enumerate(snow_thresholds):
        X = all_ice.query('slope < ' + str(slope_thr)).query(str(min_height_diff) + 
                                                             ' < height_diff <' + str(max_height_diff)).copy()
        X.loc[:, 'binary_snow_cover'] = (X['snow_cover'] > snow_thr).astype(np.int)
        x, y = X['binary_snow_cover'].values.reshape(-1, 1), X['height_diff'].values
        linear_regressor.fit(x, y)
        coef_grid[i, j] = linear_regressor.coef_[0]

In [None]:
dataset = hv.Dataset((snow_thresholds, 
                      slope_thresholds,
                      coef_grid),
                     ['snow_thresholds', 'slope_thresholds'], 'sensitivity')
dataset.to(hv.Image).opts(cmap='viridis', colorbar=True, **size)

In [None]:
thresholds = [50, 137, 200, 240]
A = all_ice.query('slope < ' + str(5)).copy()

lines = []
for thr in thresholds:
    X = A.query(str(min_height_diff) + ' < height_diff <' + str(max_height_diff)).copy()
    X.loc[:, 'binary_snow_cover'] = (X['snow_cover'] >= thr).astype(np.int)
    x, y = X['binary_snow_cover'].values.reshape(-1, 1), X['height_diff'].values
    lines.append(hv.BoxWhisker(X[['binary_snow_cover', 'height_diff']], ['binary_snow_cover'], 'height_diff').opts(title=str(thr)))

hv.Layout(lines).cols(2)

### Supervised Classification

In [None]:
# preprocessing transformer
class PreProc(BaseEstimator, TransformerMixin):
    def __init__(self, snow_threshold=1):
        self.snow_threshold = snow_threshold
    
    def fit(self, X, y):
        return self
        
    def transform(self, X):
        ret = (X['snow_cover'] > self.snow_threshold).values.reshape(-1, 1) # binary snow 
        return ret

In [None]:
n = 10 # number of snow height bins
md = 10
ms = 255

In [None]:
pip = Pipeline([('preproc', PreProc()),
                ('clsf', SVC())])
grid = {'preproc__snow_threshold': snow_thresholds}
model = GridSearchCV(estimator = pip, param_grid=grid, cv=5)

In [None]:
heat_map = np.zeros((len(snow_thresholds), len(slope_thresholds)))

for i, model in enumerate(models):
    heat_map[:, i] = model.cv_results_['mean_test_score']

bounds = (slope_thresholds[0], snow_thresholds[0], slope_thresholds[-1], snow_thresholds[-1])
hv.Image(heat_map, bounds=bounds).opts(cmap='viridis', colorbar=True, **size)

In [None]:
snow_thresholds = np.arange(10, 240)
slope_thresholds = np.quantile(all_ice['slope'], np.arange(0.05, 1, 0.05))

# Fit a GMM

In [None]:
n = 10 # number of snow height bins
md = 10
ms = 255
model = gmm(n)

In [None]:
# prepare data matrix
X = all_ice[['snow_cover', 'height_diff', 'dem', 'land_cover']]

all_ice.loc[:, 'height_labels'] = model.fit_predict(X)

In [None]:
pip = Pipeline([('preproc', PreProc()),
                ('regress', linear_regressor)])

grid = {'preproc__snow_threshold': snow_thresholds}

In [None]:
model.means_

In [None]:
height_diff_height = all_ice.hvplot('height_diff', 'dem', kind='scatter', color='height_labels', 
                                             colormap='viridis', alpha=0.2, colorbar=False)

pts_height_label = [hv.Points(all_ice.query('height_labels == ' + str(i))) 
             for i in np.unique(all_ice['height_labels'])]

xhist, yhist = (hv.Overlay([histogram(pts, dimension=dim, num_bins=60) for pts in pts_qsnow])
                for dim in ['height_diff', 'dem'])

composition = decimate(height_diff_height).opts(ylim=height_diff_range) << yhist.opts(width=125) << xhist.opts(height=125)
composition.opts(opts.Histogram(alpha=0.3))