# Example notebook for collecting data, training and evaluating the model

## Importing

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from skfda.ml.clustering import FuzzyCMeans, KMeans


import salishsea_tools.viz_tools as sa_vi

## Files Reading

In [65]:
# The location of the file
ds = xr.open_dataset('/data/ibougoudis/MOAD/files/integrated_original.nc')

# # Downgrading the resolution
# ds = ds.isel(time_counter = (np.arange(0, len(ds.time_counter),2)), 
#     y=(np.arange(ds.y[0], ds.y[-1], 5)), 
#     x=(np.arange(ds.x[0], ds.x[-1], 5)))

# Selecting the first 2 years
dataset = ds.sel(time_counter = slice('2007-2-15', '2009-2-15'))
dates = pd.DatetimeIndex(dataset['time_counter'].values)


## Opening the dataset and its features

In [None]:
# Printing the features of the dataset
print(dataset)

# Plotting diatoms for the first day
dataset.Diatom[0].plot()

In [None]:
test.shape

## Datasets Preparation

In [66]:
# Here I am packing all of them in one variable, named drivers

drivers = np.stack([np.ravel(dataset['Temperature_(0m-15m)']),
    np.ravel(dataset['Temperature_(15m-100m)']), 
    np.ravel(dataset['Salinity_(0m-15m)']),
    np.ravel(dataset['Salinity_(15m-100m)']),
    ])

print(drivers.shape)
# Removing of nans
indx = np.where(~np.isnan(drivers).any(axis=0))
drivers = drivers[:,indx[0]]
print(drivers.shape)

diat = np.ravel(dataset['Diatom'])
print(diat.shape)
diat = diat[indx]
print(diat.shape)

# Transpose to bring it to the format (samples, features)
drivers = drivers.transpose()

# 1st sample - grid box
drivers[0]

(4, 54325408)
(4, 7064808)
(54325408,)
(7064808,)


array([ 8.534305,  8.593387, 28.23707 , 28.385187], dtype=float32)

In [67]:
test = np.reshape(np.ravel(dataset.Diatom), (len(dataset.time_counter), len(dataset.y) * len(dataset.x)))

indx = np.where(~np.isnan(test).any(axis=0))
test = test[:, indx[0]]


In [68]:
from skfda.representation.grid import FDataGrid
test2 = FDataGrid(test.transpose())

## Training 

In [69]:
n_clusters = 6
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(test2)
clusters = kmeans.predict(test2)
clusters


2