In [9]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xskillscore as xs

from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import r_regression

from skfda.representation.grid import FDataGrid
from skfda.ml.clustering import KMeans

from skfda.misc.hat_matrix import NadarayaWatsonHatMatrix, LocalLinearRegressionHatMatrix, KNeighborsHatMatrix
from skfda.preprocessing.smoothing import KernelSmoother

from skfda.ml.regression import HistoricalLinearRegression

from sklearn.metrics import root_mean_squared_error as rmse

import os
import lzma
import dill

from tqdm import tqdm

import cmocean.cm as cm
import salishsea_tools.viz_tools as sa_vi

np.warnings.filterwarnings('ignore') # For the nan mean warning


In [10]:
name = 'Diatom'
units = '[mmol m-2]'
category = 'Concentrations'

if name == 'Diatom':
    inputs_names = ['Summation_of_solar_radiation','Mean_wind_speed','Mean_air_temperature']
else:
    inputs_names = ['Summation_of_solar_radiation','Mean_air_temperature','Mean_pressure', 'Mean_precipitation', 'Mean_specific_humidity']

ds = xr.open_dataset('/data/ibougoudis/MOAD/files/integrated_original.nc')
ds2 = xr.open_dataset('/data/ibougoudis/MOAD/files/external_inputs.nc')

# Low resolution

ds = ds.isel(y=(np.arange(ds.y[0], ds.y[-1], 5)), 
    x=(np.arange(ds.x[0], ds.x[-1], 5)))

ds2 = ds2.isel(y=(np.arange(ds2.y[0], ds2.y[-1], 5)), 
    x=(np.arange(ds2.x[0], ds2.x[-1], 5)))

dataset = ds.sel(time_counter = slice('2007', '2007'))
dataset2 = ds2.sel(time_counter = slice('2007', '2007'))


In [11]:
targets = dataset[name].to_numpy()

targets = np.reshape(targets,(75,180*80))

inputs = []
for i in inputs_names:
    inputs.append(dataset2[i].to_numpy())

inputs = np.array(inputs)

inputs = np.reshape(inputs,(3,75,180*80))

x =  np.tile(dataset2.x, len(dataset2.y))
y =  np.tile(np.repeat(dataset2.y, len(dataset2.x)),1)

indx = np.where((~np.isnan(targets).any(axis=0)) & (x>10) & ((x>100) | (y<880)))
inputs = inputs[:,:,indx[0]]
targets = targets[:,indx[0]]


In [12]:
targets.shape

(75, 1838)

In [13]:
inputs0 = np.transpose(inputs,axes=(2,1,0))
targets0 = targets.transpose()
inputs0 = FDataGrid(data_matrix=inputs0, grid_points=np.arange(0,len(targets0[0])))
targets0 = FDataGrid(data_matrix=targets0, grid_points=np.arange(0,len(targets0[0])))

model = HistoricalLinearRegression(n_intervals=3).fit(inputs0,targets0)
predictions = model.predict(inputs0)

predictions = np.array(predictions.to_grid(np.arange(0,len(targets))).data_matrix)
predictions = np.squeeze(predictions,2)

predictions = predictions.transpose()


In [14]:
a = np.mean(targets,axis=1)
a

array([0.17161487, 0.16998011, 0.17420622, 0.1761727 , 0.17258373,
       0.16868513, 0.16790976, 0.16874997, 0.17552003, 0.18238737,
       0.18261328, 0.18067137, 0.18600597, 0.19262919, 0.19711187,
       0.21132495, 0.20530919, 0.20863732, 0.21274578, 0.21630125,
       0.22845681, 0.24061698, 0.22755136, 0.22426793, 0.21000492,
       0.20260459, 0.21227905, 0.21715931, 0.23406759, 0.23420077,
       0.23160841, 0.23169077, 0.24658231, 0.241703  , 0.25214858,
       0.24841559, 0.23711047, 0.22729613, 0.22201945, 0.23980566,
       0.27013782, 0.29760155, 0.32656435, 0.3685128 , 0.37742582,
       0.43100808, 0.44953215, 0.5143321 , 0.59721875, 0.61079477,
       0.61355388, 0.64280527, 0.57596317, 0.53894825, 0.4982403 ,
       0.5056259 , 0.48774568, 0.47653283, 0.39765441, 0.37499431,
       0.37553859, 0.32442376, 0.30099847, 0.3214591 , 0.35201092,
       0.39033141, 0.40012795, 0.42483878, 0.44711321, 0.43739228,
       0.3903652 , 0.35318243, 0.32819392, 0.36167902, 0.39371

In [15]:
b = np.mean(predictions,axis=1)
b

array([0.17161487, 0.16998011, 0.17420622, 0.1761727 , 0.17258373,
       0.16868513, 0.16790976, 0.16874997, 0.17552003, 0.18238737,
       0.18261328, 0.18067137, 0.18600597, 0.19262919, 0.19711187,
       0.21132495, 0.20530919, 0.20863732, 0.21274578, 0.21630125,
       0.22845681, 0.24061698, 0.22755136, 0.22426793, 0.21000492,
       0.20260459, 0.21227905, 0.21715931, 0.23406759, 0.23420077,
       0.23160841, 0.23169077, 0.24658231, 0.241703  , 0.25214858,
       0.24841559, 0.23711047, 0.22729613, 0.22201945, 0.23980566,
       0.27013782, 0.29760155, 0.32656435, 0.3685128 , 0.37742582,
       0.43100808, 0.44953215, 0.5143321 , 0.59721875, 0.61079477,
       0.61355388, 0.64280527, 0.57596317, 0.53894825, 0.4982403 ,
       0.5056259 , 0.48774568, 0.47653283, 0.39765441, 0.37499431,
       0.37553859, 0.32442376, 0.30099847, 0.3214591 , 0.35201092,
       0.39033141, 0.40012795, 0.42483878, 0.44711321, 0.43739228,
       0.3903652 , 0.35318243, 0.32819392, 0.36167902, 0.39371

In [20]:
dataset = ds.sel(time_counter = slice('2009', '2009'))
dataset2 = ds2.sel(time_counter = slice('2009', '2009'))

targets = dataset[name].to_numpy()

targets = np.reshape(targets,(75,180*80))

inputs = []
for i in inputs_names:
    inputs.append(dataset2[i].to_numpy())

inputs = np.array(inputs)

inputs = np.reshape(inputs,(3,75,180*80))

x =  np.tile(dataset2.x, len(dataset2.y))
y =  np.tile(np.repeat(dataset2.y, len(dataset2.x)),1)

indx = np.where((~np.isnan(targets).any(axis=0)) & (x>10) & ((x>100) | (y<880)))
inputs = inputs[:,:,indx[0]]
targets = targets[:,indx[0]]


In [21]:
targets.shape

(75, 1838)

In [22]:
inputs0 = np.transpose(inputs,axes=(2,1,0))
targets0 = targets.transpose()
inputs0 = FDataGrid(data_matrix=inputs0, grid_points=np.arange(0,len(targets0[0])))
targets0 = FDataGrid(data_matrix=targets0, grid_points=np.arange(0,len(targets0[0])))

predictions = model.predict(inputs0)

predictions = np.array(predictions.to_grid(np.arange(0,len(targets))).data_matrix)
predictions = np.squeeze(predictions,2)

predictions = predictions.transpose()


In [23]:
a = np.mean(targets,axis=1)
a

array([0.19228935, 0.20289152, 0.21452206, 0.22683279, 0.24004792,
       0.24782262, 0.26266961, 0.26864944, 0.24668963, 0.24150577,
       0.22933639, 0.21747264, 0.23027617, 0.24347883, 0.24359234,
       0.24056169, 0.2365183 , 0.23305449, 0.24579775, 0.26174459,
       0.27879614, 0.27279774, 0.2889902 , 0.299467  , 0.32107366,
       0.34814309, 0.37573868, 0.36610999, 0.33234508, 0.29811878,
       0.27751935, 0.2660066 , 0.25239842, 0.24521947, 0.238977  ,
       0.25736064, 0.26013723, 0.24097879, 0.22869497, 0.23962328,
       0.2359469 , 0.24251613, 0.23173801, 0.25081041, 0.25381823,
       0.27126792, 0.2468295 , 0.25041458, 0.27261394, 0.29720302,
       0.32991429, 0.39229749, 0.47111475, 0.52781998, 0.56278802,
       0.62729109, 0.60838219, 0.55144873, 0.55749132, 0.60063272,
       0.64911212, 0.6482884 , 0.65062734, 0.63596855, 0.59883971,
       0.55154208, 0.52683899, 0.5107316 , 0.50493365, 0.51183969,
       0.51917843, 0.53635989, 0.55115316, 0.50280669, 0.51500

In [24]:
b = np.mean(predictions,axis=1)
b

array([0.17161487, 0.18485595, 0.19991011, 0.21702481, 0.23354732,
       0.24880706, 0.27137651, 0.28544215, 0.29260786, 0.31103782,
       0.31766705, 0.31257382, 0.31858309, 0.33606555, 0.36184044,
       0.42018768, 0.45749271, 0.46591296, 0.4752638 , 0.4659125 ,
       0.42688761, 0.41038017, 0.38239536, 0.31791505, 0.22671917,
       0.15304867, 0.13683454, 0.14820828, 0.1546177 , 0.15044403,
       0.14046563, 0.11976943, 0.13572502, 0.13949964, 0.16992326,
       0.19232999, 0.19296099, 0.18272059, 0.18175318, 0.20198821,
       0.24104311, 0.2997878 , 0.33667978, 0.39728092, 0.43436667,
       0.49332037, 0.50589893, 0.56003311, 0.62854073, 0.66031768,
       0.68140871, 0.74513874, 0.71857737, 0.7063626 , 0.69785643,
       0.70440869, 0.65058994, 0.6213073 , 0.57223695, 0.54876058,
       0.54372804, 0.50624499, 0.46173988, 0.44449118, 0.4358337 ,
       0.43964037, 0.43063935, 0.37723934, 0.3473195 , 0.33683621,
       0.29157051, 0.28341527, 0.25391988, 0.23335767, 0.28116