In [None]:
import numpy as np
import xarray as xr
from xarray_filters import *

In [None]:
from xarray_filters.tests.test_data import new_test_dataset

In [None]:
X = new_test_dataset(layers=('temperature', 'pressure', 'wind_x', 'wind_y'))

In [None]:
X

In [None]:
set(dir(MLDataset)) - set(dir(xr.Dataset))

In [None]:
X_means_raster = X.mean(dim=('z', 't'))
X_means_raster

In [None]:
f = X.to_ml_features()
f

In [None]:
f.space

In [None]:
f.layer

In [None]:
f.space.indexes['space'].tolist()[:4]

In [None]:
f.space.indexes['space'].names

In [None]:
example2 = X.mean(dim='x').to_ml_features(trans_dims=('t', 'z', 'y'))
example2

In [None]:
@data_vars_kwargs
def magnitude(wind_x, wind_y, **other_data_vars):
    a2 = wind_x ** 2
    b2 = wind_y ** 2
    mag = (a2 + b2) ** 0.5
    return dict(magnitude=mag)
X.new_layer(name='magnitude', layers=['wind_x', 'wind_y'], transforms=magnitude).compute()

In [None]:
@for_each_array
def plus_one(arr, **kw):
    return arr + 1

@for_each_array
def minus_one(arr, **kw):
    return arr - 1


plus = X.new_layer(transforms=plus_one, compute=True)
minus = X.new_layer(transforms=minus_one, compute=True)

assert np.all(plus.wind_x - minus.wind_x == 2.)
assert np.all(plus.temperature - minus.temperature == 2.)

In [None]:
@return_dataset
@for_each_array
def transform_example(arr, **kw):
    up = arr.quantile(0.75, dim='z')
    low = arr.quantile(0.25, dim='z')
    median = arr.quantile(0.5, dim='z')
    return (arr - median) / (up - low)

X.new_layer(transforms=transform_example, compute=True, return_dict=False)

In [None]:
@return_dataset
@for_each_array
def agg_example(arr, **kw):
    return arr.mean(dim='t').quantile(0.25, dim='z')

aggregated = X.new_layer(transforms=(transform_example, agg_example), compute=True)

In [None]:
aggregated

In [None]:
from functools import partial
@for_each_array
def f(arr, **kw):
    return arr + 1

(f(aggregated) - aggregated)

In [None]:
from collections import OrderedDict
@return_dataset
@data_vars_kwargs
def f(wind_x, wind_y, temperature, pressure):
    mag = (wind_x ** 2 + wind_y ** 2) ** 0.5
    return OrderedDict([('mag', mag), ('temperature', temperature), ('pressure', pressure)])

f(X)

In [None]:
feat = f(X).to_ml_features()
feat

In [None]:
feat.features

In [None]:
feat.features.values

In [None]:
@for_each_array
def agg_x(arr, **kw):
    return arr.mean(dim='x')
@for_each_array
def agg_y(arr, **kw):
    return arr.mean(dim='y')
@for_each_array
def agg_z(arr, **kw):
    return arr.mean(dim='z')


time_series = X.new_layer(transforms=(agg_x, agg_y, agg_z), flatten=False, compute=True)
time_series

In [None]:
%matplotlib inline
for k, arr in time_series.data_vars.items():
    arr.plot.line();

In [None]:
time_series.to_ml_features().features

In [None]:
np.all(time_series.to_ml_features().from_ml_features().temperature == time_series.temperature)

In [None]:
layers = ['band_{}'.format(idx) for idx in range(1, 9)]
shape = (200, 200)
rand_np_arr = lambda: np.random.normal(0, 1, shape)
coords = [('x', np.arange(shape[0])), ('y', np.arange(shape[1]))]
rand_data_arr = lambda: xr.DataArray(rand_np_arr(), coords=coords, dims=('x', 'y'))
data_vars = OrderedDict([(layer, rand_data_arr()) for layer in layers])
dset = MLDataset(data_vars)
dset

In [None]:
@for_each_array
def standardize(arr, dim=None, **kw):
    mean = arr.mean(dim=dim)
    std = arr.std(dim=dim)
    return (arr - mean) / std

@data_vars_kwargs
def ndvi(band_5, band_4):
    return OrderedDict([('ndvi', (band_5 - band_4) / (band_5 + band_4))])

@data_vars_kwargs
def ndwi(band_3, band_5):
    return {'ndwi': (band_3 - band_5) / (band_3 + band_5)}

@data_vars_kwargs
def mndwi_36(band_3, band_6):
    return xr.Dataset({'mndwi_36': (band_3 - band_6) / (band_3 + band_6)})

@data_vars_kwargs
def mndwi_37(band_3, band_7):
    return MLDataset(OrderedDict([('mndwi_37', (band_3 - band_7) / (band_3 + band_7))]))

normed_diffs = dset.new_layer(name=None,
                              transforms=(ndvi, ndwi, mndwi_36, mndwi_37),
                              compute=True,
                              keep_existing_layers=False,
                              flatten=False)
standardized = dset.new_layer(transforms=partial(standardize, dim='x'), flatten=False, compute=True)


In [None]:
normed_diffs

In [None]:
standardized

In [None]:
catted = merge((normed_diffs, standardized)) # merge from xarray_filters
catted = catted.to_ml_features()

In [None]:
catted.features

In [None]:
catted.layer

In [None]:
catted.from_ml_features()

In [None]:
shp = (2, 3, 4, 5, 6, 7)
dims = ('a', 'b', 'c', 'd', 'e', 'f')
coords = OrderedDict([(dim, np.arange(s)) for s, dim in zip(shp, dims)])
dset = MLDataset(OrderedDict([('layer_{}'.format(idx), xr.DataArray(np.random.normal(0, 10, shp), coords=coords, dims=dims)) 
                              for idx in range(6)]))
dset

In [None]:
dset.layer_0.shape

In [None]:
dset.to_ml_features()