In [None]:
from typing import Tuple
import pandas as pd
import ipywidgets as widgets
from sklearn.preprocessing import StandardScaler
import ipywidgets as widgets
import json
import torch
import torch.nn as nn
from tqdm.notebook import trange


def to_sequence(x, y, input_window, output_window, device=torch.device('cpu')):
    num_samples = x.shape[0] - input_window - output_window
    def generator():
        nonlocal num_samples, x, y, input_window, output_window, device
        for i in range(num_samples):
            start_x = i
            end_x = start_x + input_window
            X = x[start_x:end_x].values[:]

            start_y = i + input_window
            end_y = start_y + output_window
            Y = y[start_y:end_y].values[:]

            X_tensor = torch.from_numpy(np.array([X], dtype=np.float32)).to(device)
            Y_tensor = torch.from_numpy(np.array([Y], dtype=np.float32)).to(device)
            yield X_tensor, Y_tensor
    return num_samples, generator()


rnn_model_text_input = None
rnn_model_submit_button = None
rnn_model_file = None


def rnn_prehook(next_step):
    global rnn_model_file, rnn_model_text_input, rnn_model_submit_button
    rnn_model_text_input = widgets.Text(continuous_update=False)
    rnn_model_submit_button = widgets.Button(description='Confirm selection')
    display(rnn_model_text_input)
    display(rnn_model_submit_button)

    def callback(e):
        global rnn_model_file, rnn_model_text_input, rnn_model_submit_button
        rnn_model_submit_button.close()
        rnn_model_text_input.disabled = True
        rnn_model_file = rnn_model_text_input.value
        next_step()
    rnn_model_submit_button.on_click(callback)


def rnn(df: pd.DataFrame, config: object)-> Tuple[pd.DataFrame, str]:
    global random_state, rnn_model_file, dfloader

    features_without_target = [f for f in dfloader.features if f not in dfloader.targets]
    for feature in features_without_target:
        df[feature].fillna(method='ffill', inplace = True)

    rnn_config = {
      "random_state": random_state,
      "input_window_size": 12,
      "output_window_size": 1,
      "hidden_size": 95,
      "num_layers": 1,
      "rnn": "GRU",
      "model_file": rnn_model_file
    }

    # Define the model
    class RNN(nn.Module):
        def __init__(self, input_size, hidden_size=100, num_layers=1, output_size=1, rnn=nn.LSTM):
            super().__init__()
            self.l1 = rnn(input_size, hidden_size, num_layers, batch_first=True)
            self.l2 = nn.Linear(hidden_size, output_size)

        def forward(self, X):
            h, _ = self.l1(X)
            h = h[:,-1, :]
            y = self.l2(h)
            y = y + X[:,-1,-1:]
            return y

    rnns = {m.__name__: m for m in [nn.LSTM, nn.GRU]}
    model = RNN(
        input_size=len(df.columns),
        hidden_size=rnn_config["hidden_size"],
        num_layers=rnn_config["num_layers"],
        output_size=1,
        rnn=rnns[rnn_config["rnn"]]
    )
    model.load_state_dict(torch.load(rnn_model_file, map_location=torch.device('cpu')))

    iw = rnn_config["input_window_size"]
    ow = rnn_config["output_window_size"]

    scaler = StandardScaler()
    data = pd.DataFrame(data=scaler.fit_transform(df.values), columns=df.columns, index=df.index)
    x = data[dfloader.features]
    # interpolate nan values in the first `input_window_size` rows, as they cannot be imputed using RNN (no valid input sequence)
    x.iloc[:iw] = x.iloc[:iw].interpolate(method='time', limit_direction="both")
    y = data[dfloader.targets]
    num_samples, sequence = to_sequence(x, y, iw, ow)
    with trange(num_samples) as tr:
        for i in tr:
            X, Y = next(sequence)
            if not np.isnan(Y): # does not needs prediction
                continue
            y_index = x.index[i + iw]
            x.loc[y_index, dfloader.targets[0]] = model(X).item()
    imputed_df = pd.DataFrame(data=scaler.inverse_transform(x.values), columns=df.columns, index=df.index)
    return imputed_df, json.dumps(rnn_config)
