# Predicting Flagellate concentration with functional clustering and regression (spatial means)

## Importing

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xskillscore as xs

from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import r_regression

from skfda.representation.grid import FDataGrid
from skfda.ml.clustering import KMeans

from skfda.misc.hat_matrix import NadarayaWatsonHatMatrix, LocalLinearRegressionHatMatrix, KNeighborsHatMatrix
from skfda.preprocessing.smoothing import KernelSmoother

from skfda.ml.regression import HistoricalLinearRegression

from sklearn.metrics import root_mean_squared_error as rmse

import os
import lzma
import dill

import random

import cmocean.cm as cm
import salishsea_tools.viz_tools as sa_vi

np.warnings.filterwarnings('ignore') # For the nan mean warning


## Datasets Preparation

In [None]:
# Creation of the training - testing datasets
def datasets_preparation(dataset, dataset2, clusters, name, inputs_names):
    
    indx = np.where((dataset.time_counter.dt.month==2) & (dataset.time_counter.dt.day==29))
    
    targets = dataset[name].to_numpy().reshape(*dataset[name].to_numpy().shape[:1],-1)

    inputs = []
    
    for i in inputs_names:
        inputs.append(dataset2[i].to_numpy().reshape(*dataset2[i].to_numpy().shape[:1],-1))

    inputs = np.array(inputs)

    # Deleting 29 of February
    inputs = np.delete(inputs,indx,axis=1)
    targets = np.delete(targets,indx,axis=0)

    # Splitting in years
    inputs = np.split(inputs,len(np.unique(dataset.time_counter.dt.year)),axis=1)
    targets = np.split(targets,len(np.unique(dataset.time_counter.dt.year)),axis=0)

    # Means
    inputs = np.nanmean(inputs,axis=0)
    targets = np.nanmean(targets,axis=0)

    x =  np.tile(dataset2.x, len(dataset2.y))
    y =  np.tile(np.repeat(dataset2.y, len(dataset2.x)),1)

    indx = np.where((~np.isnan(targets).any(axis=0)) & (x>10) & ((x>100) | (y<880)))
    inputs = inputs[:,:,indx[0]]
    targets = targets[:,indx[0]]

    clusters = np.tile(np.ravel(clusters), len(dataset.time_counter))
    clusters = clusters[indx[0]]

    return(inputs, targets, indx, clusters)


## Clustering Finalization

In [None]:
def clustering(dataset,quant,indx,name):

    # Training
    n_clusters = 6
    kmeans = KMeans(n_clusters=n_clusters)
    clusters = kmeans.fit_predict(quant)

    # Sorting so that cluster 1 has the minimum mean target value, 6 the maximum

        # Finding the mean of each cluster
    if name == 'inputs':
        cluster_mean_all = np.mean(kmeans.cluster_centers_.data_matrix,axis=1)
        cluster_mean = cluster_mean_all[:,0]  # Sorted based on the first input
    else:
        cluster_mean = np.squeeze(np.mean(kmeans.cluster_centers_.data_matrix,axis=1))

        # The index to sort the clusters
    indx3 = np.argsort(np.argsort(cluster_mean)) # For the complete map we need the double np.argsort

        # Sorting
    for j in np.arange(0,len(np.unique(clusters))):
        clusters = xr.where(kmeans.labels_==j, indx3[j], clusters)

    unique, _ = np.unique(clusters, return_counts=True)

    # Creating the map
    indx2 = np.full(len(dataset.y) * len(dataset.x),np.nan)
    indx2[indx[0]] = clusters
    clusters = np.reshape(indx2,(len(dataset.y),len(dataset.x))) 
    clusters2 = xr.DataArray(clusters,dims = ['y','x'])
    
    # Plotting
    fig, ax = plt.subplots(figsize =(5,9))

    cmap = plt.get_cmap('tab20', unique.max()+1)
    cmap.set_bad('gray')
    clus = clusters2.plot(ax=ax, cmap=cmap, vmin = unique.min(), vmax = unique.max()+1, add_colorbar=False)

    cbar = fig.colorbar(clus, ticks = unique+0.5) 
    cbar.set_ticklabels(unique+1)
    cbar.set_label('Clusters [count]')
    ax.set_title('Functional Clustering for '+ name + ' (2007-2020)')

    sa_vi.set_aspect(ax)
    plt.show()

    return(clusters,n_clusters)
    

## Functional Clustering (Target)

In [None]:
def func_clust_target(dataset, name):

    indx = np.where((dataset.time_counter.dt.month==2) & (dataset.time_counter.dt.day==29))
    
    targets = dataset[name].to_numpy().reshape(*dataset[name].to_numpy().shape[:1],-1)

    # Deleting 29 of February
    targets = np.delete(targets,indx,axis=0)

    # Splitting in years
    targets = np.split(targets,len(np.unique(dataset.time_counter.dt.year)),axis=0)

    # Means
    targets = np.nanmean(targets,axis=0)

    x =  np.tile(dataset.x, len(dataset.y))
    y =  np.tile(np.repeat(dataset.y, len(dataset.x)),1)

    indx = np.where((~np.isnan(targets).any(axis=0))& (x>10) & ((x>100) | (y<880)))
    targets = targets[:,indx[0]]

    # Converting it to an appropriate format for functional clustering
    targets = targets.transpose()
    targets2 = FDataGrid(targets)

    clusters, n_clusters = clustering(dataset,targets2,indx,name)

    return(clusters,0,n_clusters)


## Functional Clustering (Drivers)

In [None]:
def func_clust_drivers(dataset,dataset2,name,inputs_names):

    indx = np.where((dataset2.time_counter.dt.month==2) & (dataset2.time_counter.dt.day==29))

    inputs = []
    
    for i in inputs_names:
        inputs.append(dataset2[i].to_numpy().reshape(*dataset2[i].to_numpy().shape[:1],-1))

    inputs = np.array(inputs)
    
    targets = dataset['Diatom'].to_numpy().reshape(*dataset2['Summation_of_solar_radiation'].to_numpy().shape[:1],-1)

    # Deleting 29 of February
    inputs = np.delete(inputs,indx,axis=1)
    targets = np.delete(targets,indx,axis=0)

    # Splitting in years
    inputs = np.split(inputs,len(np.unique(dataset2.time_counter.dt.year)),axis=1)
    targets = np.split(targets,len(np.unique(dataset.time_counter.dt.year)),axis=0)

    # Means
    inputs = np.nanmean(inputs,axis=0)
    targets = np.nanmean(targets,axis=0)

    x =  np.tile(dataset2.x, len(dataset2.y))
    y =  np.tile(np.repeat(dataset2.y, len(dataset2.x)),1)

    indx = np.where((~np.isnan(targets).any(axis=0))& (x>10) & ((x>100) | (y<880))) # Target goes down to 100m
    inputs = inputs[:,:,indx[0]]

    # Scaling the inputs
    temp = np.reshape(inputs,(len(inputs),inputs.shape[1]*inputs.shape[2]))
    temp = temp.transpose()
    scaler_inputs = make_column_transformer((StandardScaler(), np.arange(0,len(inputs_names))))
    temp = scaler_inputs.fit_transform(temp)
    temp = temp.transpose()
    inputs = np.reshape(temp,(len(inputs),inputs.shape[1],inputs.shape[2])) 

    # Converting it to an appropriate format for functional clustering
    inputs = np.transpose(inputs,axes=(2,1,0))
    inputs2 = FDataGrid(inputs, np.arange(0,len(inputs[0])))

    clusters, n_clusters = clustering(dataset2,inputs2,indx,name)

    return(clusters, 1, n_clusters)


## File Creation

In [None]:
def file_creation(path, variable, name):

    temp = variable.to_dataset(name=name)
    temp.to_netcdf(path = path + 'targets_predictions.nc', mode='a', encoding={name:{"zlib": True, "complevel": 9}})
    

## Regressor

In [None]:
def regressor (inputs, targets, name, cluster, inputs_names):

    # Printing of the correlation coefficients
    temp_inputs = np.reshape(inputs,(len(inputs),inputs.shape[1]*inputs.shape[2]))
    temp_inputs = temp_inputs.transpose()
    temp_targets = np.ravel(targets)

    r = np.round(r_regression(temp_inputs,temp_targets),2)
    for i in range (0, len(inputs_names)):
        print('The correlation coefficient between ' +inputs_names[i]+ ' and ' +name+  ' for cluster ' +str(cluster+1)+ ' is: ' +str(r[i]))
    print ('\n')

    # Scaling the inputs
    scaler_inputs = make_column_transformer((StandardScaler(), np.arange(0,len(inputs_names))))
    temp_inputs = scaler_inputs.fit_transform(temp_inputs)
    temp_inputs = temp_inputs.transpose()
    inputs = np.reshape(temp_inputs,(len(inputs),inputs.shape[1],inputs.shape[2]))   
    
    # Scaling the targets
    scaler_targets = StandardScaler()
    temp_targets = np.expand_dims(temp_targets,-1)
    temp_targets = scaler_targets.fit_transform(temp_targets)
    targets = temp_targets.reshape(targets.shape)

    # Final transformations
    inputs = np.transpose(inputs,axes=(2,1,0))
    targets = targets.transpose()
    inputs = FDataGrid(data_matrix=inputs, grid_points=np.arange(0,len(targets[0])))
    targets = FDataGrid(data_matrix=targets, grid_points=np.arange(0,len(targets[0])))

    ## Smoothing
    # targets = targets.to_basis(FourierBasis(n_basis=10))
    kernel_estimator = LocalLinearRegressionHatMatrix(bandwidth=1)
    smoother = KernelSmoother(kernel_estimator=kernel_estimator)
    # inputs = smoother.fit_transform(inputs)

    model = HistoricalLinearRegression(n_intervals=3,lag=74)
    regr = model.fit(inputs,targets)

    return(regr,scaler_inputs,scaler_targets,smoother)


## Scaling

In [None]:
def scaling(regr,inputs,scaler_inputs,targets,scaler_targets,smoother):

    # Scaling the inputs
    temp = np.reshape(inputs,(len(inputs),inputs.shape[1]*inputs.shape[2]))
    temp = temp.transpose()
    temp = scaler_inputs.transform(temp)
    temp = temp.transpose()        
    inputs = np.reshape(temp,(len(inputs),inputs.shape[1],inputs.shape[2]))
        
    inputs = np.transpose(inputs,axes=(2,1,0))
    inputs = FDataGrid(data_matrix=inputs, grid_points=np.arange(0,len(targets)))

    # inputs = smoother.transform(inputs)

    predictions = regr.predict(inputs)

    # Post-processing of predictions
    predictions = np.array(predictions.to_grid(np.arange(0,len(targets))).data_matrix)
    predictions = np.squeeze(predictions,2)

    # Scaling the predictions
    temp = np.ravel(predictions)
    temp = np.expand_dims(temp,axis=-1)
    temp = scaler_targets.inverse_transform(temp)
    predictions = temp.reshape(predictions.shape)
    predictions = predictions.transpose()

    return(predictions)


## Scatter Plot

In [None]:
def scatter_plot(dates,targets, predictions, name):

    indx = pd.DatetimeIndex(dates[0:75]) # From the first year

    # compute slope m and intercept b
    m, b = np.polyfit(targets, predictions, deg=1)

    fig, ax = plt.subplots()

    scatter = ax.scatter(targets,predictions, s = 10, c= indx.month)

    lims = [np.min([ax.get_xlim(), ax.get_ylim()]),
        np.max([ax.get_xlim(), ax.get_ylim()])]

    # plot fitted y = m*x + b
    ax.axline(xy1=(0, b), slope=m, color='r')

    ax.set_xlabel('targets')
    ax.set_ylabel('predictions')
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    ax.set_aspect('equal')
    ax.legend(handles=scatter.legend_elements()[0], labels=['February','March','April'])

    ax.plot(lims, lims,linestyle = '--',color = 'k')

    fig.suptitle(name)

    plt.show()

    return(m)


## Pre-training

In [None]:
def pre_training(dataset,dataset2,n_clusters,clusters0,name,inputs_names):

    np.warnings.filterwarnings('ignore') # For the nan mean warning

    clusters_indiv_t = np.zeros((len(np.unique(dataset.time_counter.dt.dayofyear))-1,len(np.unique(dataset.time_counter.dt.year)),n_clusters))
    clusters_indiv_d = np.zeros((len(inputs_names),len(np.unique(dataset.time_counter.dt.dayofyear))-1,len(np.unique(dataset.time_counter.dt.year)),n_clusters))

    ds = dataset
    ds2 = dataset2

    for i in range(0, len(np.unique(ds.time_counter.dt.year))):

        dataset = ds.sel(time_counter = slice(str(np.unique(ds.time_counter.dt.year)[i]), str(np.unique(ds.time_counter.dt.year)[i])))
        dataset2 = ds2.sel(time_counter = slice(str(np.unique(ds2.time_counter.dt.year)[i]), str(np.unique(ds2.time_counter.dt.year)[i])))

        inputs, targets, indx, _ = datasets_preparation(dataset, dataset2, clusters0, name, inputs_names)

        clusters1 = np.ravel(clusters0)[indx]

        for j in range (0,n_clusters):

            temp = xr.where(clusters1==j, inputs, np.nan)
            clusters_indiv_d[:,:,i,j] = np.nanmean(temp,axis=2)

            temp = xr.where(clusters1==j, targets, np.nan)
            clusters_indiv_t[:,i,j] = np.nanmean(temp,axis=1)

    return(clusters_indiv_d,clusters_indiv_t,indx)


## Plotting (Mean Values)

In [None]:
def plotting_mean_values(dates,n_clusters,targets,predictions,r_train,rms_train,slope_train,category,units,region):

    years = np.unique(dates.year)
    
    ticks = []
    for i in range (0,targets.shape[0]*targets.shape[1],targets.shape[0]):
        ticks.append(i)

    targets = np.reshape(targets,(targets.shape[0]*targets.shape[1],targets.shape[2]), order = 'F')
    predictions = np.reshape(predictions,(predictions.shape[0]*predictions.shape[1],predictions.shape[2]), order = 'F')
    
    targets_masked = np.ma.array(targets)
    predictions_masked = np.ma.array(predictions)

    targets_masked[ticks] = np.ma.masked
    predictions_masked[ticks] = np.ma.masked

    for i in range (0,n_clusters):

        fig, _ = plt.subplots(figsize=(19,5))

        temp = pd.DataFrame(np.vstack((r_train[i],rms_train[i],slope_train[i])).transpose(),index=['Cluster '+str(i+1)],columns=['r','rms','slope'])
        display(temp)

        plt.plot(targets_masked[:,i], label = 'targets')
        plt.plot(predictions_masked[:,i], label = 'predictions')
        plt.xlabel('Years')
        plt.xticks(ticks,years)
        plt.suptitle('Mean '+category + ' ' +units + ' (15 Feb - 30 Apr) ' + region + ' (Cluster '+str(i+1)+ ')')
        plt.legend()

        plt.show()


## Post-Training

In [None]:
def post_training(dates,n_clusters,targets,predictions,category,units,region):

    r_train = np.full(n_clusters,np.nan)
    rms_train = np.full(n_clusters,np.nan)
    slope_train = np.full(n_clusters,np.nan)

    for i in range (0,n_clusters):

        r_train[i] = np.round(np.corrcoef(np.ravel(targets[:,:,i]),np.ravel(predictions[:,:,i]))[0][1],3)
        rms_train[i] = rmse(np.ravel(targets[:,:,i]),np.ravel(predictions[:,:,i]))
        m,_ = np.polyfit(np.ravel(targets[:,:,i]),np.ravel(predictions[:,:,i]), deg=1)
        slope_train[i] = np.round(m,3)
    
    plotting_mean_values(dates,n_clusters,targets,predictions,r_train,rms_train,slope_train,category,units,region)

    return(r_train,rms_train,slope_train)


## Plotting (Clusters)

In [None]:
def plotting_clusters(years,n_clusters,targets):

    ticks = []
    for i in range (0,targets.shape[0]*targets.shape[1],targets.shape[0]):
        ticks.append(i)

    targets = np.reshape(targets,(1050,6), order = 'F')
    
    targets_masked = np.ma.array(targets)

    targets_masked[ticks] = np.ma.masked

    for i in range (0,n_clusters):

        fig, _ = plt.subplots(figsize=(19,5))

        plt.plot(targets_masked[:,i])
        plt.xlabel('Years')
        plt.xticks(ticks,years)
        plt.suptitle('Cluster '+str(i+1))

        plt.show()
        

## Initiation

In [None]:
name = 'Diatom'
units = '[mmol m-2]'
category = 'Concentrations'

if name == 'Diatom':
    inputs_names = ['Summation_of_solar_radiation','Mean_wind_speed','Mean_air_temperature']
else:
    inputs_names = ['Summation_of_solar_radiation','Mean_air_temperature','Mean_pressure', 'Mean_precipitation', 'Mean_specific_humidity']

ds = xr.open_dataset('/data/ibougoudis/MOAD/files/integrated_original.nc')
ds2 = xr.open_dataset('/data/ibougoudis/MOAD/files/external_inputs.nc')


## Clustering

In [None]:
dataset = ds.sel(time_counter = slice('2007', '2020'))
dataset2 = ds2.sel(time_counter = slice('2007', '2020'))

# Selecting the clustering input (drivers or target)

clusters0, id, n_clusters  = func_clust_target(dataset, name)
# clusters0, id, n_clusters = func_clust_drivers(dataset,dataset2,name,inputs_names)

#Low resolution

# y = np.array([y for y in range(0,898,5)])
# x = np.array([x for x in range (0,398,5)])
# clusters0 = clusters0[y][:,x]


## Training

In [None]:
# Low resolution

# ds = ds.isel(y=(np.arange(ds.y[0], ds.y[-1], 5)), 
#     x=(np.arange(ds.x[0], ds.x[-1], 5)))

# ds2 = ds2.isel(y=(np.arange(ds2.y[0], ds2.y[-1], 5)), 
#     x=(np.arange(ds2.x[0], ds2.x[-1], 5)))

dataset = ds.sel(time_counter = slice('2007', '2020'))
dataset2 = ds2.sel(time_counter = slice('2007', '2020'))

years = np.unique(dataset.time_counter.dt.year)

inputs,targets,indx = pre_training(dataset,dataset2,n_clusters,clusters0,name,inputs_names)

regr_all = []
scaler_inputs_all = []
scaler_targets_all = []
smoother_all = []

predictions = np.full(targets.shape,np.nan)

for i in range (0,n_clusters):

    inputs2 = inputs[:,:,:,i] # inputs of the i cluster
    targets2 = targets[:,:,i] # targets of the i cluster
    regr, scaler_inputs,scaler_targets,smoother = regressor(inputs2,targets2,name,i,inputs_names)

    scaler_inputs_all.append(scaler_inputs)
    scaler_targets_all.append(scaler_targets)
    smoother_all.append(smoother)

    regr_all.append(regr)

    predictions[:,:,i] = scaling(regr_all[i],inputs2,scaler_inputs_all[i],targets2,scaler_targets_all[i],smoother_all[i]) # putting them in the right place


## Heatmaps

In [None]:
for i in range(0,len(inputs_names)):

    fig, axs = plt.subplots(1,n_clusters, figsize = (24,6), layout='constrained')

    for j in range(0,n_clusters):

        temp = regr_all[j].coef_
        coeff = temp.data_matrix
        coeff = np.where(coeff==0,np.nan,coeff)

        if j==0: #first time for this input feature

            vmin = np.nanmin(coeff[0,:,:,i])
            vmax = np.nanmax(coeff[0,:,:,i])

        h = axs[j].imshow(coeff[0,:,:,i], cmap='bwr',aspect='auto', vmin=-np.maximum(np.abs(vmin),vmax), vmax=np.maximum(np.abs(vmin),vmax))
        axs[j].set_ylim(axs[j].get_ylim()[::-1])
        cbar = fig.colorbar(h)
        axs[j].set_title('Cluster ' +str(j+1))
        axs[j].set_xlabel('Day')
        axs[j].set_ylabel('Day')
        fig.suptitle(inputs_names[i])


## Time-series (Training)

In [None]:
dates = pd.DatetimeIndex(dataset['time_counter'].values)
indx2 = ~((dataset.time_counter.dt.month==2) & (dataset.time_counter.dt.day==29))
dates = dates[indx2]

r_train,rms_train,slope_train = post_training(dates,n_clusters,targets,predictions,units,category,'Salish Sea')

season = np.mean(targets,axis=1)

season_train = np.tile(season,len(np.unique(dates.year))) # Broadcasting season to all training years
season_train = np.reshape(season_train,targets.shape)

plt.plot(season)
plt.legend(('Cluster 1','Cluster 2','Cluster 3','Cluster 4','Cluster 5','Cluster 6'))
plt.suptitle('Long-term seasonality (2007-2020)')
plt.show()

r_train_season,_,slope_train_season = post_training(dates,n_clusters,targets-season_train,predictions-season_train,units,category,'Salish Sea (removed seasonality)')


## Other Years

In [None]:
dataset = ds.sel(time_counter = slice('2021', '2024'))
dataset2 = ds2.sel(time_counter = slice('2021', '2024'))

dates = pd.DatetimeIndex(dataset['time_counter'].values)
indx = ~((dataset.time_counter.dt.month==2) & (dataset.time_counter.dt.day==29))
dates = dates[indx]

years = np.unique(dataset.time_counter.dt.year)

inputs,targets,indx = pre_training(dataset,dataset2,n_clusters,clusters0,name,inputs_names)

season_test = np.tile(season,len(years)) # Broadcasting season to all testing years
season_test = np.reshape(season_test,(targets.shape[0],len(years),n_clusters))

predictions = np.full(targets.shape,np.nan)

for i in range (0,n_clusters):

    inputs2 = inputs[:,:,:,i] # inputs of the i cluster
    targets2 = targets[:,:,i] # targets of the i cluster
    predictions[:,:,i] = scaling(regr_all[i],inputs2,scaler_inputs_all[i],targets2,scaler_targets_all[i],smoother_all[i]) # putting them in the right place

season_test = np.tile(season,len(years)) # Broadcasting season to all testing years
season_test = np.reshape(season_test,(targets.shape[0],len(years),n_clusters))


## Time-series (Testing)

In [None]:
r_test,rms_test,slope_test = np.zeros(n_clusters), np.zeros(n_clusters), np.zeros(n_clusters)

r_test_season, slope_test_season = np.zeros(n_clusters), np.zeros(n_clusters)

targets_sum, predictions_sum = np.zeros((n_clusters,len(years))), np.zeros((n_clusters,len(years)))

targets_mean, predictions_mean = np.zeros((n_clusters,len(years))), np.zeros((n_clusters,len(years)))

targets_diff, predictions_diff = np.zeros((n_clusters,targets.shape[0],len(years))), np.zeros((n_clusters,targets.shape[0],len(years)))

rss = np.zeros(n_clusters)

r_test_season,_,slope_test_season = post_training(dates,n_clusters,targets-season_test,predictions-season_test,units,category,'Salish Sea (removed Seasonality)')

for i in range (0,n_clusters):

    r_test[i] = np.round(np.corrcoef(np.ravel(targets[:,:,i]),np.ravel(predictions[:,:,i]))[0][1],3)
    rms_test[i] = rmse(np.ravel(targets[:,:,i]),np.ravel(predictions[:,:,i]))
    m,_ = np.polyfit(np.ravel(targets[:,:,i]),np.ravel(predictions[:,:,i]), deg=1)
    slope_test[i] = np.round(m,3)

    rss[i] = np.sum((np.ravel(targets[:,:,i])-np.ravel(predictions[:,:,i]))**2) # Similar to rms, is not affected by the seasonality

    for j in range (0, len(years)):

        targets_sum[i,j] = np.sum(targets[:,j,i]-season_test[:,j,i])
        predictions_sum[i,j] = np.sum(predictions[:,j,i]-season_test[:,j,i])

        targets_mean[i,j] = np.mean(targets[:,j,i]-season_test[:,j,i])
        predictions_mean[i,j] = np.mean(predictions[:,j,i]-season_test[:,j,i])

        mean = np.mean(targets[:,j,i])
        std = np.std(targets[:,j,i])
        diff = mean + 1*std

        targets_diff[i,:,j] = np.where(targets[:,j,i]>diff,targets[:,j,i], np.nan)
        predictions_diff[i,:,j] = np.where(predictions[:,j,i]>diff,predictions[:,j,i], np.nan)

targets_diff = np.reshape(targets_diff,(n_clusters,targets.shape[0]*targets.shape[1]), order = 'F')
predictions_diff = np.reshape(predictions_diff,(n_clusters,targets.shape[0]*targets.shape[1]), order = 'F')

for i in range (0, len(targets_diff)):
        plt.figure()
        plt.plot(targets_diff[i])
        plt.plot(predictions_diff[i])


## Saving

In [None]:
# path = '/data/ibougoudis/MOAD/files/results/' + name + '/func_reg_cl_target_s2/'

# os.makedirs(path, exist_ok=True)
# with lzma.open(path + 'regr_all.xz', 'wb') as f:
    
#     dill.dump(regr, f)

# with open(path + 'train_metrics.pkl', 'wb') as f:
#     dill.dump([r_train,rms_train,slope_train,r_train_season,slope_train_season,season.transpose()], f)

# with open(path + 'test_metrics.pkl', 'wb') as f:
#     dill.dump([r_test,rms_test,slope_test,r_test_season,slope_test_season,targets_sum,predictions_sum,targets_mean,predictions_mean,targets_diff,predictions_diff,rss], f)

# with open(path + 'targets-predictions.pkl', 'wb') as f:
#     dill.dump([targets,predictions], f)
