## Importing

In [34]:
import xarray as xr
import numpy as np

from sklearn import preprocessing
from sklearn_som.som import SOM

from tqdm.auto import tqdm


## Datasets Preparation

In [35]:
def datasets_preparation():

    clusters = xr.DataArray(coords=coords, dims = ['time_counter', 'y', 'x'],
        attrs=dict(description="Clusters of the performed self organizing map algorithm",
        long_name ="Cluster",
        units="count"))
        
    return (clusters)

## SOM (Drivers)

In [36]:
def som (inputs, m, n):

    # Pre processing 
    indx = np.where(~np.isnan(inputs).any(axis=0))
    inputs2 = inputs[:,indx[0]]
    inputs2 = preprocessing.normalize(inputs2, norm= 'max')
    inputs2 = inputs2.transpose()

    # SOM
    temp_som = SOM(m, n, dim= inputs2[0,:].size, lr = 0.1)
    temp_som.fit(inputs2, epochs = 5)
    predictions = temp_som.predict(inputs2)

    # Post processing
    indx2 = np.full(inputs[0,:].size,np.nan)
    indx2[indx[0]] = predictions
    clusters = np.reshape(indx2,(898,398)) 

    return(clusters)
    

## File Creation

In [37]:
def file_creation(variable, name):

    temp = variable.to_dataset(name=name)
    temp.to_netcdf(path='D:\\nc\clustering.nc', mode='a', encoding={name:{"zlib": True, "complevel": 9}})


## Main Body

In [38]:
ds = xr.open_dataset('D:\\nc\integrated_original.nc')

# Dimensions of the map
m = 3
n = 2

coords = dict(time_counter=ds.time_counter, y=ds.y, x=ds.x) 

clusters_d = datasets_preparation()
clusters_n = datasets_preparation()
clusters_p = datasets_preparation()
clusters_z = datasets_preparation()
 
for i in tqdm(range (0, len(ds.time_counter)+1), leave=False):        

    dataset = ds.isel(time_counter=i)        
   
    drivers = np.stack([np.ravel(dataset['Temperature_(0m-15m)']),
        np.ravel(dataset['Temperature_(15m-100m)']), 
        np.ravel(dataset['Salinity_(0m-15m)']),
        np.ravel(dataset['Salinity_(15m-100m)'])])

    nutrients = np.stack([np.ravel(dataset['Silicon']),
        np.ravel(dataset['Nitrate']), np.ravel(dataset['Ammonium'])])

    phyto = np.stack([np.ravel(dataset['Diatom']),
        np.ravel(dataset['Flagellate'])])

    zoo = np.stack([np.ravel(dataset['Microzooplankton']),
        np.ravel(dataset['Mesozooplankton'])])
    
    clusters_d[i] = som(drivers, m, n)
    clusters_n[i] = som(nutrients, m, n)
    clusters_p[i] = som(phyto, m, n)
    clusters_z[i] = som(zoo, m, n)

# Calling file creation

file_creation(clusters_d, 'Clusters_Drivers')
file_creation(clusters_n, 'Clusters_Nutrients')
file_creation(clusters_p, 'Clusters_Phytoplankton')
file_creation(clusters_z, 'Clusters_Zooplankton')


                                               