#### Multivariate Time Series Forecasting with Deep Learning

[Source 1](https://towardsdatascience.com/multivariate-time-series-forecasting-with-deep-learning-3e7b3e2d2bcf) \
[Source 2](https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/)

In [None]:
from common import get_clustered_dataframes

clusters_data = get_clustered_dataframes()

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

def get_train_test_data(cluster_data, n_steps=10, test_size=0.2, shuffle=False):

    data = cluster_data.copy() 

    data['Date'] = pd.to_datetime(data['Date'])  # Convert the 'date' column to datetime

    # Sort the data by date
    data = data.sort_values(by='Date')

    # Create input data by shifting prices to create sequences
    X = data.drop(columns=['Date']).values
    X_seq = [X[i:i + n_steps] for i in range(len(X) - n_steps)]

    # Shift the closing price to predict the next closing price
    y = data.drop(columns=['Date']).shift(-n_steps).values

    # Split the data into training and testing sets
    _X_train, X_test, _y_train, y_test = train_test_split(X_seq, y[:-n_steps], test_size=0.2, shuffle=shuffle)

    # split the training set into training and validation sets
    X_train, X_valid, y_train, y_valid = train_test_split(_X_train, _y_train, test_size=0.2, shuffle=shuffle)

    # Reshape the data to 3D for LSTM
    X_train = np.array(X_train)
    X_test = np.array(X_test)
    X_valid = np.array(X_valid)
    y_valid = np.array(y_valid)
    y_train = np.array(y_train)
    y_test = np.array(y_test)

    print('X_train shape:', X_train.shape)
    print('y_train shape:', y_train.shape)
    print('X_valid shape:', X_valid.shape)
    print('y_valid shape:', y_valid.shape)
    print('X_test shape:', X_test.shape)
    print('y_test shape:', y_test.shape)
    print("\n\n")

    return X_train, X_valid, X_test, y_train, y_valid, y_test

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
import mlflow


def calculate_rmse(y_true, y_pred):
    return sqrt(mean_squared_error(y_true, y_pred))


def build_model(n_steps, n_features):
    return tf.keras.Sequential([
        tf.keras.layers.LSTM(64, activation='relu', input_shape=(n_steps, n_features), return_sequences=True),
        tf.keras.layers.LSTM(64, activation='relu'),
        tf.keras.layers.Dense(n_features),
        tf.keras.layers.Dense(n_features)
    ])


def lstm_training(cluster_data, n_steps=10, test_size=0.2, shuffle=False):

    data = cluster_data.copy()
    X_train, X_valid, X_test, y_train, y_valid, y_test = get_train_test_data(data, n_steps, test_size, shuffle)
    
    model = build_model(n_steps, X_train.shape[2])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train, y_train, epochs=250, batch_size=32, validation_data=(X_valid, y_valid))

    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate and print RMSE
    for i, cripto in enumerate(data.columns[:-1]):
        rmse = calculate_rmse(y_test[:, i], y_pred[:, i])
        print(f'Root Mean Squared Error (RMSE) for {cripto}: {rmse:.4f}')

In [None]:
for cluster, cripto in clusters_data.items():
    print(f'Cluster {cluster}: {cripto.columns[:-1]}\n')

    lstm_training(cripto, n_steps=2)

    print("\n---------------------------------\n")