# Diatom Biomass

## Importing

In [93]:
import numpy as np
import pandas as pd

import dill # You might need to install this package - saving & loading of files
from skfda import FDataGrid # You might need to install this package - format of functional regression files


## Loading of files

In [94]:
name = 'Diatom'

# Path of the files provided
path = '/data/ibougoudis/MOAD/analysis-ilias/notebooks/temp/' + name + '_biomass/' # Change this to the relative path

# Printing of the readme file
readme = open(path + name[0:4].lower() + '_bio' + '_readme.txt', 'r')
print(readme.read())

f = open(path + name[0:4].lower() + '_bio' + '_regressor.pkl', 'rb')
regr = dill.load(f)

f = open(path + name[0:4].lower() + '_bio' + '_scaler_inputs.pkl', 'rb')
scaler_inputs = dill.load(f)

f = open(path + name[0:4].lower() + '_bio' + '_scaler_targets.pkl', 'rb')
scaler_targets = dill.load(f)

f = open(path + name[0:4].lower() + '_bio' + '_smoother.pkl', 'rb')
smoother = dill.load(f)


name: Diatom
period: jan_apr
input_features: ['Summation_of_solar_radiation', 'Summation_of_longwave_radiation']
n_intervals: 4
lag: 29.75



## Scaling

In [95]:
def scaling(regr,inputs,scaler_inputs,scaler_targets,smoother):

    # Scaling the inputs
    inputs0 = inputs # keeping the shape
    temp = np.reshape(inputs,(len(inputs),inputs.shape[1]*inputs.shape[2]), order='F')
    temp = temp.transpose()
    temp = scaler_inputs.transform(temp)
    temp = temp.transpose()        
    inputs = np.reshape(temp,(len(inputs),inputs.shape[1],inputs.shape[2]), order='F')
        
    inputs = np.transpose(inputs,axes=(2,1,0))
    inputs = FDataGrid(data_matrix=inputs, grid_points=np.arange(0,inputs.shape[1]))

    inputs = smoother.transform(inputs)

    predictions = regr.predict(inputs)

    # Post-processing of predictions
    predictions = np.array(predictions.to_grid(np.arange(0,inputs0.shape[1])).data_matrix)
    predictions = np.squeeze(predictions,2)

    # Scaling the predictions
    temp = np.reshape(predictions, (inputs0.shape[1]*inputs0.shape[2]), order='F')
    temp = np.expand_dims(temp,axis=-1)
    temp = scaler_targets.inverse_transform(temp)
    predictions = temp.reshape(predictions.shape, order='F')
    predictions = predictions.transpose()

    return(predictions)

## Example of prediction

In [96]:
inputs = pd.read_excel(path + name[0:4].lower() + '_bio' +'_inputs.xlsx', sheet_name= name) # Excel file for this variable
inputs = inputs.iloc[:,1:3].to_numpy()
inputs = np.expand_dims(inputs, -1) # We do this because the regressor accepts arrays of shape (n_features, n_samples, 1)
inputs = np.transpose(inputs,(1,0,2)) # We do this because the regressor accepts arrays of shape (n_features, n_samples, 1)

indx = np.where(inputs[0,:,0] == -1)[0][0] # The first day for which we do not have measurements (07 Apr)

print(inputs.shape)

predictions = scaling(regr,inputs,scaler_inputs,scaler_targets,smoother)

predictions = np.squeeze(predictions) # We do this to remove the 1 dimension

predictions[indx:] = np.nan # to remove instances where we do not have input measurements (07 Apr onwards)

for i in range (0, 10): # Printing the 10 first outputs

    print('The ' + str(i+1) + ' model output of ' + name + ':')
    print(np.round(predictions[i],4))
    

(2, 120, 1)
The 1 model output of Diatom:
0.2086
The 2 model output of Diatom:
0.2039
The 3 model output of Diatom:
0.1978
The 4 model output of Diatom:
0.194
The 5 model output of Diatom:
0.192
The 6 model output of Diatom:
0.192
The 7 model output of Diatom:
0.1951
The 8 model output of Diatom:
0.1979
The 9 model output of Diatom:
0.1995
The 10 model output of Diatom:
0.2026
