## Importing

In [10]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn_som.som import SOM
import salishsea_tools.viz_tools as sa_vi
from sklearn import preprocessing
import os


## Datasets Preparation

In [11]:
def datasets_preparation ():

    # Dataset and date    
    ds_name = ('/results2/SalishSea/nowcast-green.202111/' + i + '/SalishSea_1d_' + '20' + str(i[5:7]) + str(dict_month[i[2:5]])+str(i[0:2]) + '_' + '20' + str(i[5:7]) + str(dict_month[i[2:5]]) + str(i[0:2]) + '_grid_T.nc')
    
    ds_bio_name = ('/results2/SalishSea/nowcast-green.202111/' + i + '/SalishSea_1d_'  + '20' + str(i[5:7]) + str(dict_month[i[2:5]])+str(i[0:2]) + '_' + '20' + str(i[5:7]) + str(dict_month[i[2:5]]) + str(i[0:2]) + '_biol_T.nc')
    
    ds = xr.open_dataset (ds_name)
    ds_bio = xr. open_dataset (ds_bio_name)

    date = pd.DatetimeIndex(ds['time_counter'].values)
    
    # Driver variables
    temp = ds.votemper
    saline = ds.vosaline
    e3t = ds.e3t

    # Biological variables
    sil = ds_bio.silicon
    nitr = ds_bio.nitrate
    ammo = ds_bio.ammonium

    # Phytoplankton variables
    flag = ds_bio.flagellates
    diat = ds_bio.diatoms

    # Zooplankton variables
    micro = ds_bio.microzooplankton
    meso = ds_bio.mesozooplankton

    # Open the mesh mask
    mesh = xr.open_dataset('/home/sallen/MEOPAR/grid/mesh_mask202108.nc')
    mask = mesh.tmask.to_numpy()

    # Applying the mask
    temp = temp.where(mask==1)
    saline = saline.where(mask==1)
    e3t = e3t.where(mask==1)

    sil = sil.where(mask==1)
    nitr= nitr.where(mask==1)
    ammo = ammo.where(mask==1)

    flag = flag.where(mask==1)
    diat = diat.where(mask==1)

    micro = micro.where(mask==1)
    meso = meso.where(mask==1)

    # Integrating the variables
    temp_i1 = (temp[0,0:15] * e3t[0,0:15]).sum('deptht', skipna = True, min_count = 15) / flag.deptht[14]
    temp_i2 = (temp[0,15:27] * e3t[0,15:27]).sum('deptht', skipna = True, min_count = 12) / (flag.deptht[26] - flag.deptht[15])
    saline_i1 = (saline[0,0:15] * e3t[0,0:15]).sum('deptht', skipna = True, min_count = 15) / flag.deptht[14]
    saline_i2 = (saline[0,15:27] * e3t[0,15:27]).sum('deptht', skipna = True, min_count = 12) / (flag.deptht[26] - flag.deptht[15])

    sil_i = (sil[0,0:27] * e3t[0,0:27]).sum('deptht', skipna = True, min_count = 27) / flag.deptht[26]
    nitr_i = (nitr[0,0:27] * e3t[0,0:27]).sum('deptht', skipna = True, min_count = 27) / flag.deptht[26]
    ammo_i = (ammo[0,0:27] * e3t[0,0:27]).sum('deptht', skipna = True, min_count = 27) / flag.deptht[26]

    flag_i = (flag[0,0:27] * e3t[0,0:27]).sum('deptht', skipna = True, min_count = 27) / flag.deptht[26]
    diat_i = (diat[0,0:27] * e3t[0,0:27]).sum('deptht', skipna = True, min_count = 27) / flag.deptht[26]

    micro_i = (micro[0,0:27] * e3t[0,0:27]).sum('deptht', skipna = True, min_count = 27) / flag.deptht[26]
    meso_i = (meso[0,0:27] * e3t[0,0:27]).sum('deptht', skipna = True, min_count = 27) / flag.deptht[26]

    return (ds, date, temp_i1, temp_i2, saline_i1, saline_i2, sil_i, nitr_i, ammo_i, diat_i,  flag_i, micro_i, meso_i)


## SOM (Drivers)

In [12]:
def som ():

    # Post processing 
    inputs = np.stack([temp_i1.values.flatten(), temp_i2.values.flatten(), saline_i1.values.flatten(), saline_i2.values.flatten()])
    indx = np.argwhere(~np.isnan(inputs[0]) & ~np.isnan(inputs[1]) & ~np.isnan(inputs[2])& ~np.isnan(inputs[3])) 
    inputs2 = inputs[:,indx[:,0]]
    inputs3 = preprocessing.normalize(inputs2, norm= 'max')
    inputs3 = inputs3.transpose()

    # SOM
    temp_som = SOM(m=3, n=2, dim= inputs3[0,:].size, lr = 0.1)
    temp_som.fit(inputs3, epochs = 5)
    predictions = temp_som.predict(inputs3)

    # Post processing
    unique, counts = np.unique(predictions, return_counts=True)
    indx2 = np.full(inputs[0,:].size,np.nan)
    indx2[indx[:,0]] = predictions
    clusters = np.reshape(indx2,(898,398)) 

    # Preparation of the dataarray 
    clusters= xr.DataArray(clusters,
                    coords = {'y': temp_i1.y, 'x': temp_i1.x},
                    dims = ['y','x'],
                    attrs=dict(description="Clusters of the performed self organizing map algorithm",
                                long_name ="Cluster",
                                units="count"),
                    )
    
    return (unique, counts, inputs2.transpose(), predictions, clusters)

# Printing

In [13]:
def printing ():

    # Preparation of the dataframe
    d = {'temperature': inputs[:,0], 'salinity': inputs[:,1], 'cluster': predictions}
    df = pd.DataFrame(d) 

    # Calculating the metrics
    mean_temp = np.round(df.groupby('cluster')['temperature'].mean(), 2)
    mean_sal = np.round(df.groupby('cluster')['salinity'].mean(), 2)
    min_temp = np.round(df.groupby('cluster')['temperature'].min(), 2)
    max_temp = np.round(df.groupby('cluster')['temperature'].max(), 2)
    min_sal = np.round(df.groupby('cluster')['salinity'].min(), 2)
    max_sal = np.round(df.groupby('cluster')['salinity'].max(), 2)

    # Printing
    lines = []
    for i in unique:
        lines.append(['The amount of grid boxes for cluster ' + str(i), ' is ' + str(counts[i]),'\n'])

        lines.append(['The minimum temperature for cluster '+ str(i), ' is ' + str(min_temp[i]), ' degrees Celsius'])
        lines.append(['The maximum temperature for cluster '+ str(i), ' is ' + str(max_temp[i]), ' degrees Celsius'])
        lines.append(['The mean temperature for cluster '+ str(i), ' is ' + str(mean_temp[i]), ' degrees Celsius', '\n'])

        lines.append(['The minimum salinity for cluster '+ str(i), ' is ' + str(min_sal[i]), ' g/kg'])
        lines.append(['The maximum salinity for cluster '+ str(i), ' is ' + str(max_sal[i]), ' g/kg'])
        lines.append(['The mean salinity for cluster '+ str(i), ' is ' + str(mean_sal[i]), ' g/kg', '\n'*2])
    
    f = open("Statistics.txt", "a")
    for line in lines:
          f.writelines(line)
          f.write('\n')

    f.close()

## Plotting Temperature and Salinity

In [14]:
def plotting_dr ():
    
    if date[0].month < 10:
        month = '0' + str(date[0].month)
    else:
        month = str(date[0].month)

    if date[0].day < 10:
        day = '0' + str(date[0].day)  
    else:
        day = str(date[0].day) 

    fig, ax = plt.subplots(2,2, figsize=(10, 15))
        
    cmap = plt.get_cmap('cubehelix')
    cmap.set_bad('gray')
    temp_i1.plot.pcolormesh(ax=ax[0,0], cmap=cmap, cbar_kwargs={'label': 'Conservative Temperature [degree_C m]'})
    temp_i2.plot.pcolormesh(ax=ax[0,1], cmap=cmap, cbar_kwargs={'label': 'Conservative Temperature [degree_C m]'})
    saline_i1.plot.pcolormesh(ax=ax[1,0], cmap=cmap, cbar_kwargs={'label': 'Reference Salinity [g kg-1 m]'})
    saline_i2.plot.pcolormesh(ax=ax[1,1], cmap=cmap, cbar_kwargs={'label': 'Reference Salinity [g kg-1 m]'})

    plt.subplots_adjust(left=0.1,
        bottom=0.1, 
        right=0.9, 
        top=0.95, 
        wspace=0.15, 
        hspace=0.15)
    
    sa_vi.set_aspect(ax[0,0])
    sa_vi.set_aspect(ax[0,1])
    sa_vi.set_aspect(ax[1,0])
    sa_vi.set_aspect(ax[1,1])

    ax[0,0].title.set_text('Conservative Temperature (0m - 15m)')
    ax[0,1].title.set_text('Conservative Temperature (15m - 100m)')
    ax[1,0].title.set_text('Reference Salinity (0m - 15m)')
    ax[1,1].title.set_text('Reference Salinity (15m - 100m)')

    fig.suptitle(str(date[0].year) + '/' + month + '/' + day)

    fig.savefig('Drivers.png')
    
    plt.close(fig)


## Plotting Nutrients 

In [15]:
def plotting_nu ():
    
    if date[0].month < 10:
        month = '0' + str(date[0].month)
    else:
        month = str(date[0].month)

    if date[0].day < 10:
        day = '0' + str(date[0].day)  
    else:
        day = str(date[0].day) 

    fig, ax = plt.subplots(2,2, figsize=(10, 15))

    cmap = plt.get_cmap('tab20', unique.max()+1)
    cmap.set_bad('gray')
    clus = clusters.plot.pcolormesh(ax=ax[0,0], cmap=cmap, vmin = unique.min(), vmax = unique.max()+1, add_colorbar=False)

    cbar = fig.colorbar(clus, ticks = unique+0.5) 
    cbar.set_ticklabels(unique)
    cbar.set_label('Clusters [count]')

    plt.subplots_adjust(left=0.1,
        bottom=0.1, 
        right=0.9, 
        top=0.95, 
        wspace=0.15, 
        hspace=0.15)
        
    cmap = plt.get_cmap('cubehelix')
    cmap.set_bad('gray')
    sil_i.plot.pcolormesh(ax=ax[0,1], cmap=cmap, cbar_kwargs={'label': 'Silicon Concentration [mmol m-2]'})
    nitr_i.plot.pcolormesh(ax=ax[1,0], cmap=cmap, cbar_kwargs={'label': 'Nitrate Concentration [mmol m-2]'})
    ammo_i.plot.pcolormesh(ax=ax[1,1], cmap=cmap, cbar_kwargs={'label': 'Ammonium Concentration [mmol m-2]'})

    sa_vi.set_aspect(ax[0,0])
    sa_vi.set_aspect(ax[0,1])
    sa_vi.set_aspect(ax[1,0])
    sa_vi.set_aspect(ax[1,1])

    ax[0,0].title.set_text('Clusters')
    ax[0,1].title.set_text('Silicon')
    ax[1,0].title.set_text('Nitrate')
    ax[1,1].title.set_text('Ammonium')

    fig.suptitle(str(date[0].year) + '/' + month + '/' + day)

    fig.savefig('Nutrients.png')
    
    plt.close(fig)


## Plotting Phytoplankton

In [16]:
def plotting_ph ():
    
    if date[0].month < 10:
        month = '0' + str(date[0].month)
    else:
        month = str(date[0].month)

    if date[0].day < 10:
        day = '0' + str(date[0].day)  
    else:
        day = str(date[0].day) 

    fig, ax = plt.subplots(2,2, figsize=(10, 15))

    cmap = plt.get_cmap('tab10', unique.max()+1)
    cmap.set_bad('gray')
    clus = clusters.plot.pcolormesh(ax=ax[0,0], cmap=cmap, vmin = unique.min(), vmax = unique.max()+1, add_colorbar=False)

    cbar = fig.colorbar(clus, ticks = unique+0.5) 
    cbar.set_ticklabels(unique)
    cbar.set_label('Clusters [count]')

    plt.subplots_adjust(left=0.1,
        bottom=0.1, 
        right=0.9, 
        top=0.95, 
        wspace=0.15, 
        hspace=0.15)
        
    cmap = plt.get_cmap('cubehelix')
    cmap.set_bad('gray')
    diat_i.plot.pcolormesh(ax=ax[0,1], cmap=cmap, cbar_kwargs={'label': 'Diatoms Concentration [mmol m-2]'})
    flag_i.plot.pcolormesh(ax=ax[1,0], cmap=cmap, cbar_kwargs={'label': 'Flagellates Concentration [mmol m-2]'})

    sa_vi.set_aspect(ax[0,0])
    sa_vi.set_aspect(ax[0,1])
    sa_vi.set_aspect(ax[1,0])

    ax[1,1].axis('off')

    ax[0,0].title.set_text('Clusters')
    ax[0,1].title.set_text('Diatoms')
    ax[1,0].title.set_text('Flagellates')

    fig.suptitle(str(date[0].year) + '/' + month + '/' + day)

    fig.savefig('Phytoplankton.png')
    
    plt.close(fig)


## Plotting Zooplankton

In [17]:
def plotting_zo ():
    
    if date[0].month < 10:
        month = '0' + str(date[0].month)
    else:
        month = str(date[0].month)

    if date[0].day < 10:
        day = '0' + str(date[0].day)  
    else:
        day = str(date[0].day) 

    fig, ax = plt.subplots(2,2, figsize=(10, 15))

    cmap = plt.get_cmap('tab20', unique.max()+1)
    cmap.set_bad('gray')
    clus = clusters.plot.pcolormesh(ax=ax[0,0], cmap=cmap, vmin = unique.min(), vmax = unique.max()+1, add_colorbar=False)

    cbar = fig.colorbar(clus, ticks = unique+0.5) 
    cbar.set_ticklabels(unique)
    cbar.set_label('Clusters [count]')

    plt.subplots_adjust(left=0.1,
        bottom=0.1, 
        right=0.9, 
        top=0.95, 
        wspace=0.15, 
        hspace=0.15)
        
    cmap = plt.get_cmap('cubehelix')
    cmap.set_bad('gray')
    micro_i.plot.pcolormesh(ax=ax[0,1], cmap=cmap, cbar_kwargs={'label': 'Microzooplankton Concentration [mmol m-2]'})
    meso_i.plot.pcolormesh(ax=ax[1,0], cmap=cmap, cbar_kwargs={'label': 'Mesozooplankton Concentration [mmol m-2]'})

    sa_vi.set_aspect(ax[0,0])
    sa_vi.set_aspect(ax[0,1])
    sa_vi.set_aspect(ax[1,0])

    ax[1,1].axis('off')

    ax[0,0].title.set_text('Clusters')
    ax[0,1].title.set_text('Microzooplankton')
    ax[1,0].title.set_text('Mesozooplankton')

    fig.suptitle(str(date[0].year) + '/' + month + '/' + day)

    fig.savefig('Zooplankton.png')
    
    plt.close(fig)


## Main Body

In [18]:
parent_dir = '/data/ibougoudis/MOAD/analysis-ilias/notebooks/integration_r'
os.makedirs(parent_dir, exist_ok= True)

dict_month = {'jan': '01',
         'feb': '02',
         'mar': '03',
         'apr': '04',
         'may': '05',
         'jun': '06',
         'jul': '07',
         'aug': '08',
         'sep': '09',
         'oct': '10',
         'nov': '11',
         'dec': '12'}

path = os.listdir('/results2/SalishSea/nowcast-green.202111/')

folders = [x for x in path if (x[2:5]=='mar' or x[2:5]=='apr' or x[2:5] == 'may')]
folders.sort()

# Open the mesh mask
mesh = xr.open_dataset('/home/sallen/MEOPAR/grid/mesh_mask202108.nc')
mask = mesh.tmask.to_numpy()

for i in folders:

    os.makedirs(os.path.join(parent_dir, i), exist_ok= True) 
    os.chdir(os.path.join(parent_dir, i))

    ds, date, temp_i1, temp_i2, saline_i1, saline_i2, sil_i, nitr_i, ammo_i, diat_i, flag_i, micro_i, meso_i = datasets_preparation()

    unique, counts, inputs, predictions, clusters = som ()

    printing ()

    plotting_dr ()
    plotting_nu ()
    plotting_ph ()
    plotting_zo ()
    
    print([i])


['01mar22']
['02mar22']
['03mar22']
['04mar22']
['05mar22']
['06mar22']
['07mar22']
['08mar22']
['09mar22']
['10mar22']
['11mar22']
['12mar22']
['13mar22']
['14mar22']
['15mar22']
['16mar22']
['17mar22']
['18mar22']
['19mar22']
['20mar22']
['21mar22']
['22mar22']
['23mar22']
['24mar22']
['25mar22']
['26mar22']
['27mar22']
['28mar22']
['29mar22']
['30mar22']
['31mar22']
