# Creating a netcdf file with clustering results from a single input variable

## Importing

In [1]:
import xarray as xr
import numpy as np

from sklearn import preprocessing
from sklearn_som.som import SOM

from tqdm.auto import tqdm


## Datasets Preparation

In [2]:
def datasets_preparation():

    clusters = xr.DataArray(coords=coords, dims = ['time_counter', 'y', 'x'],
        attrs=dict(description="Clusters of the performed self organizing map algorithm",
        long_name ="Cluster",
        units="count"))
        
    return (clusters)


## SOM (Drivers)

In [3]:
def som (inputs, m, n):

    # Pre processing 
    indx = np.where(~np.isnan(inputs))
    inputs2 = inputs[indx]
    inputs2 = inputs2.reshape(len(inputs2),1)

    # SOM
    temp_som = SOM(m, n, dim= inputs2[0].size, lr = 0.1)
    temp_som.fit(inputs2, epochs = 5)
    predictions = temp_som.predict(inputs2)

    # Post processing
    indx2 = np.full(inputs.size,np.nan)
    indx2[indx] = predictions
    clusters = np.reshape(indx2,(898,398)) 

    return(clusters)
    

## File Creation

In [4]:
def file_creation(variable, name):

    temp = variable.to_dataset(name=name)
    temp.to_netcdf(path='/data/ibougoudis/MOAD/files/clustering_diatom.nc', mode='a', encoding={name:{"zlib": True, "complevel": 9}})


## Main Body

In [None]:
ds = xr.open_dataset('/data/ibougoudis/MOAD/files/integrated_original.nc')

# Dimensions of the map
m = 3
n = 2

coords = dict(time_counter=ds.time_counter, y=ds.y, x=ds.x) 

clusters_p = datasets_preparation()
 
for i in tqdm(range (0, len(ds.time_counter)), leave=False):        

    dataset = ds.isel(time_counter=i)        

    phyto = np.ravel(dataset['Diatom'])

    clusters_p[i] = som(phyto, m, n)

# Calling file creation
file_creation(clusters_p, 'Clusters_Diatom')
