In [1]:
import os

import IPython.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from LSTM import *

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

figures saved to ../Figures/Modeling/LSTM


In [5]:
# Hyperparameters
EPOCH = 100
NAME = 'LSTM_CV9'
INPUT_LENGTH = 10
LABEL_LENGTH = 9
HIDDEN_DIM = 20
N_LAYERS = 1
DROPOUT = 0
CLASSIFICATION = False

In [2]:
# Load data
VM = load_VM('917.csv')
# VM = load_VM('599.csv')
# Make it univariate
df = VM[['CPU usage [MHZ]']]
# df = VM

In [3]:
from sklearn.model_selection import TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits = 5)

In [None]:
metrics_cv = []
for fold, idx in enumerate(tscv.split(df)):
    print(f'Fold {fold}:')
    # Get indexes for each fold (incremental for time series)
    idxs = np.concatenate((idx[0], idx[1]))
    df_fold = df.iloc[idxs,:]
    NAME_FOLD = f'{NAME}_{fold}'

    # Data pipeline
    # Split the data
    # (70%, 20%, 10%) split for the training, validation, and test sets
    train_df, val_df, test_df = split_data(df_fold)

    # Normalizing
    scaler = MinMaxScaler()
    train_df, val_df, test_df = data_transformation(scaler, train_df, val_df, test_df)

    # LSTM model
    lstm_model = LstmModel(input_width=INPUT_LENGTH, label_width=LABEL_LENGTH, df=df_fold, train_df=train_df, val_df=val_df,
                           test_df=test_df, epoch=EPOCH, units=HIDDEN_DIM, layers=N_LAYERS, dropout=DROPOUT, name=NAME_FOLD,
                           classification=CLASSIFICATION)

    # Training
    print('Training:')
    history = lstm_model.compile_and_fit(patience=80)
    # Prediction
    print('Prediction:')
    pred = lstm_model.prediction(scaler)
    # Evaluation
    print('Evaluation:')
    metrics = lstm_model.evaluation(pred, scaler)
    metrics_cv.append(metrics)

In [12]:
metrics_cv_all = pd.concat(metrics_cv, axis=1)

In [13]:
metrics_cv_all = metrics_cv_all.T.reset_index()

In [None]:
# Generate a plot with index
for fold, idx in enumerate(tscv.split(df)):
    print(f'Fold {fold}:')
    # Get indexes for each fold (incremental for time series)
    idxs = np.concatenate((idx[0], idx[1]))
    df_fold = df.iloc[idxs, :]

In [4]:
from matplotlib.patches import Patch

np.random.seed(1338)
cmap_data = plt.cm.Paired
cmap_cv = plt.cm.Set2
n_splits = 5

In [7]:
def plot_cv_indices(cv, df, ax, n_splits, lw=10):
    """Create a sample plot for indices of a cross-validation object."""

    # Generate the training/testing visualizations for each CV split
    for ii, idx in enumerate(cv.split(df)):
        # Fill in indices with the training/test groups
        idxs = np.concatenate((idx[0], idx[1]))
        df_fold = df.iloc[idxs, :]
        indices = np.array([np.nan] * len(df_fold))
        n = len(df_fold)
        training = 0.7
        validation = 0.2
        indices[0:int(n * training)] = 0
        indices[int(n * training):int(n * (training + validation))] = 1
        indices[int(n * (training + validation)):] = 2

        # Visualize the results
        ax.scatter(
            range(len(indices)),
            [ii + 0.5] * len(indices),
            c=indices,
            marker="_",
            lw=lw,
            cmap=cmap_cv,
            vmin=-0.2,
            vmax=1.2,
        )
    #
    # # Plot the data classes and groups at the end
    # ax.scatter(
    #     range(len(X)), [ii + 1.5] * len(X), c=y, marker="_", lw=lw, cmap=cmap_data
    # )
    #
    # ax.scatter(
    #     range(len(X)), [ii + 2.5] * len(X), c=group, marker="_", lw=lw, cmap=cmap_data
    # )

    # Formatting
    yticklabels = list(range(n_splits))
    ax.set(
        yticks=np.arange(n_splits) + 0.5,
        yticklabels=yticklabels,
        xlabel="Sample index",
        ylabel="CV iteration",
        ylim=[n_splits + 0.2, -0.2],
        xlim=[0, 9000],
    )
    ax.set_title("{}".format(type(cv).__name__), fontsize=15)
    return ax

In [8]:
%matplotlib tk
fig, ax = plt.subplots()
plot_cv_indices(tscv, df, ax, n_splits)

ValueError: 'c' argument has 3 elements, which is inconsistent with 'x' and 'y' with size 2873.

In [2]:
df = pd.read_csv('./logs/LSTM_CV9_4/metrics_cv.txt',sep=',',index_col=0,engine='python')

In [3]:
%matplotlib tk
fig = plt.figure(dpi=200)
df[['MAE','RMSE','MAPE']].T.plot.bar(rot=0)
plt.xlabel('Metric')
plt.title(f'Cross-validation')

Text(0.5, 1.0, 'Cross-validation')