# Predicting Flagellate concentration with functional regression based on the oceanographic boxes (spatial means)

## Importing

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xskillscore as xs

from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import r_regression

from skfda.representation.grid import FDataGrid
from skfda.ml.clustering import KMeans

from skfda.misc.hat_matrix import LocalLinearRegressionHatMatrix
from skfda.preprocessing.smoothing import KernelSmoother

from skfda.ml.regression import HistoricalLinearRegression

from sklearn.metrics import root_mean_squared_error as rmse

import os
import lzma
import dill

from tqdm import tqdm

import cmocean as cm
import salishsea_tools.viz_tools as sa_vi


## Datasets Preparation

In [None]:
# Creation of the training - testing datasets

def datasets_preparation(dataset, boxes, regions, name, inputs_names):
    
    indx = np.where((dataset.time_counter.dt.month==2) & (dataset.time_counter.dt.day==29))

    targets = dataset[name].to_numpy().reshape(*dataset[name].to_numpy().shape[:1],-1)

    inputs = []
    for i in inputs_names:
        inputs.append(dataset[i].to_numpy().reshape(*dataset[i].to_numpy().shape[:1],-1))
    inputs = np.array(inputs)

    # Deleting 29 of February
    inputs = np.delete(inputs,indx,axis=1)
    targets = np.delete(targets,indx,axis=0)

    # Splitting in years
    inputs = np.array(np.split(inputs,len(np.unique(dataset.time_counter.dt.year)),axis=1))
    targets = np.array(np.split(targets,len(np.unique(dataset.time_counter.dt.year)),axis=0))

    # Transposing
    inputs = np.transpose(inputs, (1,2,0,3))
    targets = np.transpose(targets, (1,0,2))

    indx = np.where(~np.isnan(targets[0]).any(axis=0))
    inputs = inputs[:,:,:,indx[0]]
    targets = targets[:,:,indx[0]]

    regions = np.ravel(regions)
    regions = regions[indx[0]]

    regions_indiv_t = np.zeros((len(np.unique(dataset.time_counter.dt.dayofyear))-1,len(np.unique(dataset.time_counter.dt.year)),len(boxes)))
    regions_indiv_d = np.zeros((len(inputs_names),len(np.unique(dataset.time_counter.dt.dayofyear))-1,len(np.unique(dataset.time_counter.dt.year)),len(boxes)))

    for j in range (0,len(boxes)):

        regions_indiv_d[:,:,:,j] = np.nanmean(np.where(regions==j, inputs, np.nan),axis=3)
        regions_indiv_t[:,:,j] = np.nanmean(np.where(regions==j, targets, np.nan),axis=2)

    inputs = regions_indiv_d
    targets = regions_indiv_t

    return(inputs, targets)


## Regressor

In [None]:
def regressor (inputs, targets, j, r_inputs, n_intervals, lag):

    # Printing of the correlation coefficients
    temp_inputs = np.reshape(inputs,(len(inputs),inputs.shape[1]*inputs.shape[2]), order='F')
    temp_inputs = temp_inputs.transpose()
    temp_targets = np.reshape(targets, (targets.shape[0]*targets.shape[1]), order='F')

    r_inputs[j] = np.round(r_regression(temp_inputs,temp_targets),2)

    # Scaling the inputs
    scaler_inputs = make_column_transformer((StandardScaler(), np.arange(0,len(inputs))))
    temp_inputs = scaler_inputs.fit_transform(temp_inputs)
    temp_inputs = temp_inputs.transpose()
    inputs = np.reshape(temp_inputs,(len(inputs),inputs.shape[1],inputs.shape[2]), order='F')   
    
    # Scaling the targets
    scaler_targets = StandardScaler()
    temp_targets = np.expand_dims(temp_targets,-1)
    temp_targets = scaler_targets.fit_transform(temp_targets)
    targets = temp_targets.reshape(targets.shape, order='F')

    # Final transformations
    inputs = np.transpose(inputs,axes=(2,1,0))
    targets = targets.transpose()
    inputs = FDataGrid(data_matrix=inputs, grid_points=np.arange(0,len(targets[0])))
    targets = FDataGrid(data_matrix=targets, grid_points=np.arange(0,len(targets[0])))

    # Smoothing
    # targets = targets.to_basis(FourierBasis(n_basis=10))
    kernel_estimator = LocalLinearRegressionHatMatrix(bandwidth=1)
    smoother = KernelSmoother(kernel_estimator=kernel_estimator)
    inputs = smoother.fit_transform(inputs)

    model = HistoricalLinearRegression(n_intervals=n_intervals, lag=lag)
    regr = model.fit(inputs,targets)

    return(regr,scaler_inputs,scaler_targets,smoother,r_inputs)


## Scaling

In [None]:
def scaling(regr,inputs,scaler_inputs,targets,scaler_targets,smoother):

    # Scaling the inputs
    temp = np.reshape(inputs,(len(inputs),inputs.shape[1]*inputs.shape[2]), order='F')
    temp = temp.transpose()
    temp = scaler_inputs.transform(temp)
    temp = temp.transpose()        
    inputs = np.reshape(temp,(len(inputs),inputs.shape[1],inputs.shape[2]), order='F')
        
    inputs = np.transpose(inputs,axes=(2,1,0))
    inputs = FDataGrid(data_matrix=inputs, grid_points=np.arange(0,len(targets)))

    inputs = smoother.transform(inputs)

    predictions = regr.predict(inputs)

    # Post-processing of predictions
    predictions = np.array(predictions.to_grid(np.arange(0,len(targets))).data_matrix)
    predictions = np.squeeze(predictions,2)

    # Scaling the predictions
    temp = np.reshape(predictions, (targets.shape[0]*targets.shape[1]), order='F')
    temp = np.expand_dims(temp,axis=-1)
    temp = scaler_targets.inverse_transform(temp)
    predictions = temp.reshape(predictions.shape, order='F')
    predictions = predictions.transpose()

    return(predictions)


## Plotting (regions)

In [None]:
def plot_box(ax, corn, colour):

    ax.plot([corn[2], corn[3], corn[3], corn[2], corn[2]], 
    [corn[0], corn[0], corn[1], corn[1], corn[0]], '-', color=colour)
    

## Radar (calculations)

In [None]:
def radar(name, boxes):

    names_1 = ['SWR, AP', '- AP', '- SWR', '+ WS', '+ LWR', '+ AT', '+ TP', '+ SH'] # 3
    id_1 = [20, 41, 27, 42, 40, 14, 16, 43] 

    names_2 = ['SWR, AP, AT', '- AP', '- SWR', '- AT', '+ WS', '+ LWR', '+ TP', '+ SH'] # 1,4,6
    id_2 = [14, 19, 21, 20, 22, 24, 44, 45] 

    names_3 = ['SWR, AT', '- AT', '- SWR', '+ WS', '+ LWR', '+ TP', '+ AP', '+ SH'] # 2,7
    id_3 = [19, 41, 46, 23, 47, 28, 14, 5] 

    names_4 = ['AP, AT', '- AP', '- AT', '+ WS', '+ TP', '+ LWR', '+ SWR', '+ SH'] # 5
    id_4 = [21, 46, 27, 48, 49, 51, 14, 50] 

    names_5 = ['SWR, SH', '- SH', '- SWR', '+ AT', '+ WS', '+ LWR', '+ AP', '+ TP'] # 8
    id_5 = [32, 41, 52, 5, 53, 54, 55, 56] 

    names_6 = ['SWR', '+ AT', '+ WS', '+ LWR', 'SWR, AP, AT', '+ TP', '+ AP', '+ SH'] # 0 
    id_6 = [41, 19, 31, 30, 14, 26 , 20, 32] 

    r_b, rms_b = np.zeros((len(boxes), 8)), np.zeros((len(boxes), 8)) # 8 is the len of instances
    names_all = []
    
    for i in range (0, len(boxes)):
        for j in range (0, len(names_1)):

            if i == 3: # SWR, AP          
                path = '/data/ibougoudis/MOAD/files/results/' + name + '/bootstraps/' + name[0:4].lower() + '_func_reg4_boxes_s' + str(id_1[j]) +'_boot_100/'
                names = names_1
               
            elif i == 1 or i == 6  or i == 4: # SWR, AP, AT
                path = '/data/ibougoudis/MOAD/files/results/' + name + '/bootstraps/' + name[0:4].lower() + '_func_reg4_boxes_s' + str(id_2[j]) +'_boot_100/'
                names = names_2

            elif i == 2 or i == 7: # SWR, AT
                path = '/data/ibougoudis/MOAD/files/results/' + name + '/bootstraps/' + name[0:4].lower() + '_func_reg4_boxes_s' + str(id_3[j]) +'_boot_100/'
                names = names_3

            elif i == 5: # AP, AT
                path = '/data/ibougoudis/MOAD/files/results/' + name + '/bootstraps/' + name[0:4].lower() + '_func_reg4_boxes_s' + str(id_4[j]) +'_boot_100/'
                names = names_4

            elif i == 8: # SWR, SH
                path = '/data/ibougoudis/MOAD/files/results/' + name + '/bootstraps/' + name[0:4].lower() + '_func_reg4_boxes_s' + str(id_5[j]) +'_boot_100/'
                names = names_5
            
            elif i == 0: # SWR
                path = '/data/ibougoudis/MOAD/files/results/' + name + '/bootstraps/' + name[0:4].lower() + '_func_reg4_boxes_s' + str(id_6[j]) +'_boot_100/'
                names = names_6

            with open(path + 'test_metrics.pkl', 'rb') as f:
                temp = dill.load(f)
                r_b[i,j] = np.mean(temp[3],axis=0)[i]
                rms_b[i,j] = np.mean(temp[1],axis=0)[i]

        names_all.append(names)

    return (names_all, r_b, rms_b)
    

## Initiation

In [None]:
name = 'Flagellate'
units = '[mmol m-2]'
category = 'biomasses'

filename = '/data/ibougoudis/MOAD/files/inputs/apr_jun.nc'
inputs_names = ['Summation_of_solar_radiation', 'Summation_of_longwave_radiation', 'Mean_precipitation', 'Mean_pressure', 'Mean_air_temperature', 'Mean_specific_humidity', 'Mean_wind_speed']

n_intervals = 3

if filename[35:42] == 'apr_jun': # 76 days, 4th period
    if n_intervals==3:
          lags = [25, 25, 25, 25, 25, 25, 25, 25, 50]
    elif n_intervals==4:
        lags = [18.25, 18.25, 37.5, 18.25, 18.25, 18.25, 18.25, 18.25, 18.25] 
    period = '(16 Apr - 30 Jun)'
    id = '4'

elif filename[35:42] == 'may_sep': # 153 days, 5th period
    if n_intervals==3:
        lags = [101.3, 50.6, 50.6, 50.6, 50.6, 101.3, 50.6, 50.6, 50.6]
    elif n_intervals==4:
        lags = [38, 38, 38, 38, 38, 38, 38, 38, 38]
    elif n_intervals==5:
        lags = [30.4, 30.4, 30.4, 30.4, 30.4, 30.4, 30.4, 30.4, 30.4] 
    period = '(01 May - 30 Sep)'
    id = '5'

ds = xr.open_dataset(filename)


## Regions

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 9))
mycmap = cm.cm.deep
mycmap.set_bad('grey')
ax.pcolormesh(ds[name][0], cmap=mycmap)
sa_vi.set_aspect(ax)

SoG_north = [650, 730, 100, 200]
plot_box(ax, SoG_north, 'g')
SoG_center = [450, 550, 200, 300]
plot_box(ax, SoG_center, 'b')
Fraser_plume = [380, 460, 260, 330]
plot_box(ax, Fraser_plume, 'm')
SoG_south = [320, 380, 280, 350]
plot_box(ax, SoG_south, 'k')
Haro_Boundary = [290, 350, 210, 280]
plot_box(ax, Haro_Boundary, 'm')
JdF_west = [250, 425, 25, 125]
plot_box(ax, JdF_west, 'c')
JdF_east = [200, 290, 150, 260]
plot_box(ax, JdF_east, 'w')
PS_all = [0, 200, 80, 320]
plot_box(ax, PS_all, 'm')
PS_main = [20, 150, 200, 280]
plot_box(ax, PS_main, 'r')

boxnames = ['GN','GC','FP','GS', 'HB', 'JdFW', 'JdFE', 'PSA', 'PSM']
fig.legend(boxnames)

boxes = [SoG_north,SoG_center,Fraser_plume,SoG_south,Haro_Boundary,JdF_west,JdF_east,PS_all,PS_main]

regions0 = np.full((len(ds.y),len(ds.x)),np.nan)

for i in range (0, len(boxes)):
    regions0[boxes[i][0]:boxes[i][1], boxes[i][2]:boxes[i][3]] = i

regions0 = xr.DataArray(regions0,dims = ['y','x'])

# # Low resolution
# temp = []

# for i in boxes:
#     temp.append([x//5 for x in i])

# boxes = temp

## Training

In [None]:
# Low resolution

# ds = ds.isel(y=(np.arange(ds.y[0], ds.y[-1], 5)), 
#     x=(np.arange(ds.x[0], ds.x[-1], 5)))

# regions0 = regions0.isel(y=(np.arange(regions0.y[0], regions0.y[-1], 5)), 
#     x=(np.arange(regions0.x[0], regions0.x[-1], 5)))

dataset = ds.sel(time_counter = slice('2007', '2020'))

labels = np.unique(dataset.time_counter.dt.strftime('%d %b'))
indx_labels = np.argsort(pd.to_datetime(labels, format='%d %b'))
labels = labels[indx_labels]

r_inputs = np.zeros((len(boxnames), len(inputs_names)))

inputs,targets = datasets_preparation(dataset,boxes,regions0,name,inputs_names)

regr_all = []
scaler_inputs_all = []
scaler_targets_all = []
smoother_all = []

predictions = np.full(targets.shape,np.nan)

for i in range (0,len(boxes)):

    inputs2 = inputs[:,:,:,i] # inputs of the i cluster
    targets2 = targets[:,:,i] # targets of the i cluster
    regr, scaler_inputs, scaler_targets, smoother, r_inputs = regressor(inputs2,targets2,i,r_inputs,n_intervals,lags[i])

    scaler_inputs_all.append(scaler_inputs)
    scaler_targets_all.append(scaler_targets)
    smoother_all.append(smoother)
    regr_all.append(regr)

    predictions[:,:,i] = scaling(regr_all[i],inputs2,scaler_inputs_all[i],targets2,scaler_targets_all[i],smoother_all[i]) # putting them in the right place

print('Metrics between input features and '+name)
temp = pd.DataFrame(r_inputs, index=boxnames, columns=inputs_names)
display(temp)


## Heatmaps - Triangles

In [None]:
for i in range(0,len(inputs_names)):

    fig, axs = plt.subplots(1,len(boxes), figsize = (28,6), layout='constrained')

    for j in range(0,len(boxes)):

        temp = regr_all[j].coef_
        coeff = temp.data_matrix
        coeff = np.where(coeff==0,np.nan,coeff)

        if j==0: # first box for this input feature

            vmin = np.nanmin(coeff[0,:,:,i])
            vmax = np.nanmax(coeff[0,:,:,i])

        h = axs[j].imshow(coeff[0,:,:,i], cmap='bwr',aspect='auto', vmin=-np.maximum(np.abs(vmin),vmax), vmax=np.maximum(np.abs(vmin),vmax))

        axs[j].set_ylim(axs[j].get_ylim()[::-1])
        cbar = fig.colorbar(h)
        axs[j].set_title(boxnames[j])
        axs[j].set_xlabel('Day')
        axs[j].set_ylabel('Day')
        fig.suptitle(inputs_names[i])


## Heatmaps - Previous Day

In [None]:
inputs_names2 = ['SWR', 'LWR', 'TP', 'AP', 'AT', 'SH', 'WS']

coef_mean = np.zeros((len(boxes),len(inputs_names)))
coef_std = np.zeros((len(boxes),len(inputs_names)))
coef_range = np.zeros((len(boxes),len(inputs_names)))
coef_max = np.zeros((len(boxes),len(inputs_names)))

fig, axs = plt.subplots(2,2, figsize = (10,10), layout='constrained')

for j in range(0,len(boxes)):

    temp = regr_all[j].coef_
    coeff = temp.data_matrix
    coeff = np.where(coeff==0,np.nan,coeff)

    for i in range (0, len(inputs_names)):
 
        temp = np.zeros((len(labels)-1))

        for k in range (0, len(labels) -1):
            temp[k] = coeff[0,k,k+1,i]

        coef_mean[j,i] = np.nanmean(temp) # Only the impact of the previous day
        coef_std[j,i] = np.nanstd(temp) # Only the impact of the previous day
        coef_range[j,i] = np.nanmax(temp) - np.nanmin(temp) # Only the impact of the previous day
        coef_max[j,i] = np.maximum(np.abs(np.nanmin(temp)),np.nanmax(temp)) # Only the impact of the previous day

axs[0,0].set_axis_off()

h = axs[0,1].imshow(coef_std, cmap = cm.cm.ice, aspect='auto', vmin= coef_std.min(), vmax=coef_std.max())
axs[0,1].set_title('Std')
cbar = fig.colorbar(h)
axs[0,1].set_ylim(axs[0,1].get_ylim()[::-1])
axs[0,1].set_xticks(range(len(inputs_names)), labels=inputs_names2)
axs[0,1].set_yticks(range(len(boxnames)), labels=boxnames, rotation=45)

h = axs[1,0].imshow(coef_range,cmap = cm.cm.ice, aspect='auto', vmin= coef_range.min(), vmax=coef_range.max())
axs[1,0].set_title('Range')
cbar = fig.colorbar(h)
axs[1,0].set_ylim(axs[1,0].get_ylim()[::-1])
axs[1,0].set_xticks(range(len(inputs_names)), labels=inputs_names2)
axs[1,0].set_yticks(range(len(boxnames)), labels=boxnames, rotation=45)

h = axs[1,1].imshow(coef_max, cmap = cm.cm.ice, aspect='auto', vmin= coef_max.min(), vmax=coef_max.max())
axs[1,1].set_title('Max Absolute Value')
cbar = fig.colorbar(h)
axs[1,1].set_ylim(axs[1,1].get_ylim()[::-1])
axs[1,1].set_xticks(range(len(inputs_names)), labels=inputs_names2)
axs[1,1].set_yticks(range(len(boxnames)), labels=boxnames, rotation=45)

fig.suptitle('Previous Day')
plt.show()


## Heatmaps - All days

In [None]:
coef_mean = np.zeros((len(boxes),len(inputs_names)))
coef_std = np.zeros((len(boxes),len(inputs_names)))
coef_range = np.zeros((len(boxes),len(inputs_names)))
coef_max = np.zeros((len(boxes),len(inputs_names)))

fig, axs = plt.subplots(2,2, figsize = (10,10), layout='constrained')

for j in range(0,len(boxes)):

    temp = regr_all[j].coef_
    coeff = temp.data_matrix
    coeff = np.where(coeff==0,np.nan,coeff)

    for i in range (0, len(inputs_names)):
        coef_mean[j,i] = np.nanmean(coeff[:,:,:,i])
        coef_std[j,i] = np.nanstd(coeff[:,:,:,i])
        coef_range[j,i] = np.nanmax(coeff[:,:,:,i]) - np.nanmin(coeff[:,:,:,i])
        coef_max[j,i] = np.maximum(np.abs(np.nanmin(coeff[:,:,:,i])),np.nanmax(coeff[:,:,:,i]))

axs[0,0].set_axis_off()

h = axs[0,1].imshow(coef_std, cmap = cm.cm.ice, aspect='auto', vmin= coef_std.min(), vmax=coef_std.max())
axs[0,1].set_title('Std')
cbar = fig.colorbar(h)
axs[0,1].set_ylim(axs[0,1].get_ylim()[::-1])
axs[0,1].set_xticks(range(len(inputs_names)), labels=inputs_names2)
axs[0,1].set_yticks(range(len(boxnames)), labels=boxnames, rotation=45)

h = axs[1,0].imshow(coef_range, cmap = cm.cm.ice, aspect='auto', vmin= coef_range.min(), vmax=coef_range.max())
axs[1,0].set_title('Range')
cbar = fig.colorbar(h)
axs[1,0].set_ylim(axs[1,0].get_ylim()[::-1])
axs[1,0].set_xticks(range(len(inputs_names)), labels=inputs_names2)
axs[1,0].set_yticks(range(len(boxnames)), labels=boxnames, rotation=45)

h = axs[1,1].imshow(coef_max, cmap = cm.cm.ice, aspect='auto', vmin= coef_max.min(), vmax=coef_max.max())
axs[1,1].set_title('Max Absolute Value')
cbar = fig.colorbar(h)
axs[1,1].set_ylim(axs[1,1].get_ylim()[::-1])
axs[1,1].set_xticks(range(len(inputs_names)), labels=inputs_names2)
axs[1,1].set_yticks(range(len(boxnames)), labels=boxnames, rotation=45)

fig.suptitle('All days')
plt.show()


## Radar Plot (R Testing no Seasonality)

In [None]:
names, r_b, rms_b = radar (name, boxes)

labels = ['(a)','(b)','(c)','(d)','(e)','(f)','(g)','(h)','(i)']

k,l = 0,0
fig, ax = plt.subplots(2, 5, figsize=(15, 6.5), layout='constrained', subplot_kw={'projection': 'polar'})

for i in range (0, len(boxes)):

    theta = np.linspace(0, 2*np.pi, 9)
    values = np.append(r_b[i,:], r_b[i,:][0])

    ax[k,l].plot(theta, values,  marker='o')
    ax[k,l].plot(np.linspace(0, 2*np.pi, 100), np.full(100, values[0]), ls = '--')
    
    ax[k,l].annotate(labels[i], (0.02, 1.2), xycoords='axes fraction', fontsize=14)

    ax[k,l].set_theta_zero_location('N')
    ax[k,l].set_rmax(np.max(values)+0.05)
    ax[k,l].set_rmin(np.min(values)-0.05)
    ax[k,l].set_rticks(np.round(np.linspace(np.round(np.min(values),1), np.round(np.max(values),1), 3), 2))

    ax[k,l].set_rlabel_position(230) # The text
    ax[k,l].tick_params(pad = 7)

    ax[k,l].set_xticks(theta[:-1], names[i])
    ax[k,l].set_title(boxnames[i], x=0.50, y=1.15)

    l=l+1
    if l==5:
        l=0
        k=k+1

fig.suptitle('Importances of Input Features on NB (R Testing no Seasonality)')

ax[k,l].remove()
ax[k,l] = fig.add_subplot(2,5,10)

ax[k,l].annotate('(j)', (0.00, 1.2), xycoords='axes fraction', fontsize=14)

h = plt.imshow(coef_max, cmap = cm.cm.ice, aspect='auto', vmin= coef_max.min(), vmax=coef_max.max())
plt.title('Max Absolute Value')
cbar = plt.colorbar(h, pad=0)

plt.ylim(plt.ylim()[::-1])
plt.xticks(range(len(inputs_names)), labels=inputs_names2, rotation=30)
plt.yticks(range(len(boxnames)), labels=boxnames, rotation=45)

plt.show()


In [None]:
r_b[8]

## Radar Plot (RMS Testing)

In [None]:
names, r_b, rms_b = radar (name, boxes)

labels = ['(a)','(b)','(c)','(d)','(e)','(f)','(g)','(h)','(i)']

k,l = 0,0
fig, ax = plt.subplots(2, 5, figsize=(15, 6.5), layout='constrained', subplot_kw={'projection': 'polar'})

for i in range (0, len(boxes)):

    theta = np.linspace(0, 2*np.pi, 9)
    values = np.append(rms_b[i,:], rms_b[i,:][0])

    ax[k,l].plot(theta, values,  marker='o')
    ax[k,l].plot(np.linspace(0, 2*np.pi, 100), np.full(100, values[0]), ls = '--')
    
    ax[k,l].annotate(labels[i], (0.02, 1.2), xycoords='axes fraction', fontsize=14)

    ax[k,l].set_theta_zero_location('N')
    # ax[k,l].set_rmax(np.max(values))
    # ax[k,l].set_rmin(np.min(values))
    # ax[k,l].set_rticks(np.round(np.linspace(np.round(np.min(values),1), np.round(np.max(values),1), 4), 2))

    ax[k,l].set_rlabel_position(230) # The text
    ax[k,l].tick_params(pad = 7)

    ax[k,l].set_xticks(theta[:-1], names[i])
    ax[k,l].set_title(boxnames[i], x=0.50, y=1.15)

    l=l+1
    if l==5:
        l=0
        k=k+1

fig.suptitle('Importances of Input Features on NB (RMS Testing)')

ax[k,l].remove()
ax[k,l] = fig.add_subplot(2,5,10)

ax[k,l].annotate('(j)', (0.00, 1.2), xycoords='axes fraction', fontsize=14)

h = plt.imshow(coef_max, cmap = cm.cm.ice, aspect='auto', vmin= coef_max.min(), vmax=coef_max.max())
plt.title('Max Absolute Value')
cbar = plt.colorbar(h, pad=0)

plt.ylim(plt.ylim()[::-1])
plt.xticks(range(len(inputs_names)), labels=inputs_names2, rotation=30)
plt.yticks(range(len(boxnames)), labels=boxnames, rotation=45)

plt.show()


## Saving

In [None]:
path = '/data/ibougoudis/MOAD/files/results/' + name + '/single_runs/' + name[0:4].lower() + '_func_reg' + id + '_boxes_s_df_im/'

os.makedirs(path, exist_ok=True)

os.makedirs(path, exist_ok=True)
with lzma.open(path + 'regr_all.xz', 'wb') as f:   
    dill.dump(regr_all, f)

with open(path + 'r_inputs.pkl', 'wb') as f:
    dill.dump(r_inputs, f)

with open(path + 'importances.pkl', 'wb') as f:
    dill.dump([coef_max], f)

with open(path + 'metrics.pkl', 'wb') as f:
    dill.dump([boxnames, inputs_names2, names, r_b, rms_b], f)

with open(path + 'readme.txt', 'w') as f:
    f.write ('name: ' + name)
    f.write('\n')
    f.write('period: ' + filename[35:42])
    f.write ('\n')
    f.write ('input_features: ')
    f.write (str([i for i in inputs_names]))
    f.write ('\n')
    f.write('n_intervals: ' + str(n_intervals))
    f.write ('\n')
    f.write('lags: ')
    f.write (str([i for i in lags]))
    f.write ('\n')
    