In [1]:
import datacube
import numpy as np
from datacube.storage import netcdf_writer
from datacube.model import Variable, CRS
import os
import re
import xarray as xr

# Parámetros


In [2]:
execID="PCA"
algorithm = "PCA"
version= 1
min_long = -75
min_lat = 5
time_ranges = [("2000-01-01", "2000-06-30"), ("2000-07-01", "2000-12-31")]
product = 'ls7_ledaps_utm18n4'
bands = ["blue", "green", "red", "nir", "swir1", "swir2"]
normalized=True
minValid=1

# Consulta

In [3]:
dc = datacube.Datacube(app="PCA")
xarr={}
i=0
for tr in time_ranges:
    xarr[i] = dc.load(product=product, longitude=(min_long, min_long+1.0), latitude=(min_lat, min_lat+1), time=tr)
    i+=1

In [4]:
#Sólo para mantener los nombres que estarán en el algoritmo:
xarr0=xarr[0]
xarr1=xarr[1]
del xarr

# Algoritmo

In [5]:
#Calcular el compuesto de medianas para cada uno de las entradas
nbar = xarr0
nodata=-9999
medians1={}
cloud_mask=np.where(np.logical_and(nbar["cf_mask"].values!=2, nbar["cf_mask"].values<4), True, False)
for band in bands:
    datos=np.where(np.logical_and(nbar.data_vars[band]!=nodata,cloud_mask),nbar.data_vars[band], np.nan)
    allNan=~np.isnan(datos)
    if normalized:
        m=np.nanmean(datos.reshape((datos.shape[0],-1)), axis=1)
        st=np.nanstd(datos.reshape((datos.shape[0],-1)), axis=1)
        datos=np.true_divide((datos-m[:,np.newaxis,np.newaxis]), st[:,np.newaxis,np.newaxis])*np.nanmean(st)+np.nanmean(m)
    medians1[band]=np.nanmedian(datos,0)
    medians1[band][np.sum(allNan,0)<minValid]=np.nan
del datos
nbar = xarr1
nodata=-9999
medians2={}
cloud_mask=np.where(np.logical_and(nbar["cf_mask"].values!=2, nbar["cf_mask"].values<4), True, False)
for band in bands:
    datos=np.where(np.logical_and(nbar.data_vars[band]!=nodata,cloud_mask),nbar.data_vars[band], np.nan)
    allNan=~np.isnan(datos)
    if normalized:
        m=np.nanmean(datos.reshape((datos.shape[0],-1)), axis=1)
        st=np.nanstd(datos.reshape((datos.shape[0],-1)), axis=1)
        datos=np.true_divide((datos-m[:,np.newaxis,np.newaxis]), st[:,np.newaxis,np.newaxis])*np.nanmean(st)+np.nanmean(m)
    medians2[band]=np.nanmedian(datos,0)
    medians2[band][np.sum(allNan,0)<minValid]=np.nan
del datos



In [6]:
from matplotlib.mlab import PCA
from sklearn.preprocessing import normalize
from scipy.cluster.vq import kmeans2,vq

In [7]:
#Preprocesar: 
nmed=None
nan_mask=None
for band in medians1:
    b=medians1[band].ravel()
    if nan_mask is None: 
        nan_mask=np.isnan(b)
    else:
        nan_mask=np.logical_or(nan_mask, np.isnan(medians1[band].ravel()))
    b[np.isnan(b)]=np.nanmedian(b)
    if nmed is None:
        sp=medians1[band].shape
        nmed=b
    else:
        nmed=np.vstack((nmed,b))
    c=medians2[band].ravel()
    nan_mask=np.logical_or(nan_mask, np.isnan(c))
    c[np.isnan(c)]=np.nanmedian(c)
    nmed=np.vstack((nmed,c))
del medians1
del medians2

In [8]:
r_PCA=PCA(nmed.T)

In [9]:
salida= r_PCA.Y.T.reshape((r_PCA.Y.T.shape[0],)+sp)
km_centroids, kmvalues=kmeans2(r_PCA.Y,4)
salida[:,nan_mask.reshape(sp)]=np.nan


In [11]:
#kmeans
kmv= kmvalues.T.reshape(sp)

(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)
(3689, 3696)


In [28]:
coordenadas = []
dimensiones =[]
xcords = {}
for coordenada in xarr0.coords:
    if(coordenada != 'time'):
        coordenadas.append( ( coordenada, xarr0.coords[coordenada]) )
        dimensiones.append(coordenada)
        xcords[coordenada] = xarr0.coords[coordenada]
valores = {"kmeans": xr.DataArray(kmv, dims=dimensiones, coords=coordenadas)}
i=1
for x in salida:
    valores["pc"+str(i)]=xr.DataArray(x, dims=dimensiones, coords=coordenadas)
    i+=1
output = xr.Dataset(valores, attrs={'crs': xarr0.crs})
for coordenada in output.coords:
    output.coords[coordenada].attrs["units"] = xarr0.coords[coordenada].units

In [29]:
output

<xarray.Dataset>
Dimensions:  (x: 3696, y: 3689)
Coordinates:
  * y        (y) float64 6.633e+05 6.633e+05 6.633e+05 6.632e+05 6.632e+05 ...
  * x        (x) float64 5e+05 5e+05 5.001e+05 5.001e+05 5.001e+05 5.001e+05 ...
Data variables:
    pc8      (y, x) float64 nan nan nan nan nan nan nan nan nan nan nan nan ...
    pc9      (y, x) float64 nan nan nan nan nan nan nan nan nan nan nan nan ...
    pc2      (y, x) float64 nan nan nan nan nan nan nan nan nan nan nan nan ...
    pc3      (y, x) float64 nan nan nan nan nan nan nan nan nan nan nan nan ...
    pc1      (y, x) float64 nan nan nan nan nan nan nan nan nan nan nan nan ...
    pc6      (y, x) float64 nan nan nan nan nan nan nan nan nan nan nan nan ...
    kmeans   (y, x) int32 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ...
    pc4      (y, x) float64 nan nan nan nan nan nan nan nan nan nan nan nan ...
    pc5      (y, x) float64 nan nan nan nan nan nan nan nan nan nan nan nan ...
    pc11     (y, x) float64 nan nan nan na

# Guardar

In [32]:
folder="./"
filename=folder+"PCA_{}_{}_{}.nc".format(min_lat,min_long,re.sub('[^\w_.)(-]', '', str(time_ranges)))
nco=netcdf_writer.create_netcdf(filename)
coords=output.coords
cnames=()
for x in coords:
    netcdf_writer.create_coordinate(nco, x, coords[x].values, coords[x].units)
    cnames=cnames+(x,)
netcdf_writer.create_grid_mapping_variable(nco, output.crs)
for band in output.data_vars:
    output.data_vars[band].values[np.isnan(output.data_vars[band].values)]=nodata
    var= netcdf_writer.create_variable(nco, band, Variable(np.dtype(np.float64), nodata, cnames, None) ,set_crs=True)
    var[:] = netcdf_writer.netcdfy_data(output.data_vars[band].values)
nco.close()