In [1]:
import os
import random
import pandas as pd
import numpy as np
from joblib import dump, load
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError

from preprocessing import TimeSeriesPreprocessor
from SCINet import SCINet, StackedSCINet

In [2]:
def make_stackedSciNet(input_shape, output_shape):
    inputs = tf.keras.Input(shape=(input_shape[1], input_shape[2]), name='inputs')
    x = StackedSCINet(horizon, features=input_shape[-1], stacks=K, levels=L, h=h, kernel_size=kernel_size)(inputs)
    model = tf.keras.Model(inputs, x)

    model.summary()
    tf.keras.utils.plot_model(model, to_file='modelDiagram.png', show_shapes=True)

    return model


def make_sciNet(input_shape, output_shape):
    inputs = tf.keras.Input(shape=(input_shape[1], input_shape[2]), name='inputs')
    x = SCINet(horizon, features=input_shape[-1], levels=L, h=h, kernel_size=kernel_size)(inputs)
    model = tf.keras.Model(inputs, x)

    model.summary()
    tf.keras.utils.plot_model(model, to_file='modelDiagram.png', show_shapes=True)

    return model

In [3]:
data_filepath = 'datasets/ETDataset-main/ETT-small/ETTh1.csv'
y_col = 'OT'
index_col = 'date'

# Hyperparams
degree_of_differencing = 0
# T, tilta
look_back_window, horizon = 48, 24
batch_size = 16
learning_rate = 3e-3
h, kernel_size, L, K = 4, 5, 3, 2
l1, l2 = 0, 0
# split_strides = look_back_window + horizon
split_strides = 1

In [4]:

# Load and preprocess data
data = pd.read_csv(data_filepath, index_col=index_col).astype('float32')

train_data = data[:int(0.6 * len(data))]
val_data = data[int(0.6 * len(data)):int(0.8 * len(data))]
test_data = data[int(0.8 * len(data)):]

In [5]:
# Train model
preprocessor = TimeSeriesPreprocessor(look_back_window, horizon, split_strides, degree_of_differencing,
                                      relative_diff=False, scaling='standard')
X_train, y_train = preprocessor.fit_transform(train_data)
X_val, y_val = preprocessor.transform(val_data)
print(f'Input shape: X{X_train.shape}, y{y_train.shape}')

model = make_stackedSciNet(X_train.shape, y_train.shape)

Input shape: X(10381, 48, 7), y(10381, 24, 7)
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inputs (InputLayer)         [(None, 48, 7)]           0         
                                                                 
 stacked_sci_net (StackedSC  (None, 24, 7)             224952    
 INet)                                                           
                                                                 
Total params: 224952 (878.72 KB)
Trainable params: 224952 (878.72 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [6]:
from tqdm import tqdm

loss_fn = tf.keras.losses.MeanSquaredError()

# Hyperparameters
epochs = 150

# Initialize early stopping parameters
best_val_mae = float('inf')
patience = 10  # for early stopping
counter = 0

# Create optimizer
# optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
# optimizer.build(model.trainable_weights)
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate)

# Split data into batches
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
dataset = dataset.shuffle(buffer_size=1024, seed=4321).batch(batch_size)

val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.shuffle(buffer_size=1024, seed=4321).batch(batch_size)

# MAE metrics
train_mae_metric = tf.keras.metrics.MeanAbsoluteError()
val_mae_metric = tf.keras.metrics.MeanAbsoluteError()


# Custom training loop
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")

    # Initialize tqdm with the number of steps (batches) per epoch
    prog_bar = tqdm(dataset, unit="step", leave=False)

    for step, (x_batch, y_batch) in enumerate(prog_bar):
        with tf.GradientTape(persistent=True) as tape:
            tape.watch(x_batch)

            # Forward pass
            y = model(x_batch, training=True)

            # Calculate loss
            loss1 = loss_fn(y_batch, model.layers[1].outputs[0])
            loss2 = loss_fn(y_batch, model.layers[1].outputs[1])
            total_loss = loss1 + loss2

            # Update tqdm description with current loss
            prog_bar.set_description(f"Training loss: {total_loss:.4f}")

        # Update MAE metric for training data
        train_mae_metric.update_state(y_batch, y)

        # Calculate gradients
        grads1 = tape.gradient(loss1, model.layers[1].scinets[0].trainable_weights)
        grads2 = tape.gradient(loss2, model.layers[1].scinets[1].trainable_weights)

        # Update weights
        optimizer.apply_gradients(zip(grads1, model.layers[1].scinets[0].trainable_weights))
        optimizer.apply_gradients(zip(grads2, model.layers[1].scinets[1].trainable_weights))

        # Clean up resources of the tape
        del tape

    # Validation loop
    for x_val_batch, y_val_batch in val_dataset:
        val_predictions = model(x_val_batch)
        val_mae_metric.update_state(y_val_batch, val_predictions)

    # Show metrics
    print(f"Training MAE: {train_mae_metric.result()}")
    print(f"Validation MAE: {val_mae_metric.result()}")

    # Early stopping check
    if val_mae_metric.result() < best_val_mae:
        best_val_mae = val_mae_metric.result()
        counter = 0
    else:
        counter += 1

    if counter >= patience:
        print("Early stopping triggered")
        break

    # Reset metrics for the next epoch
    train_mae_metric.reset_states()
    val_mae_metric.reset_states()


Epoch 1/10


                                                                          

Training MAE: 0.6425275802612305
Validation MAE: 0.4695684313774109
Epoch 2/10


                                                                          

Training MAE: 0.40406882762908936
Validation MAE: 0.4374469220638275
Epoch 3/10


                                                                          

Training MAE: 0.3823499083518982
Validation MAE: 0.42486223578453064
Epoch 4/10


                                                                          

Training MAE: 0.37281474471092224
Validation MAE: 0.4271564781665802
Epoch 5/10


                                                                          

Training MAE: 0.36914053559303284
Validation MAE: 0.4143417775630951
Epoch 6/10


                                                                          

Training MAE: 0.36939698457717896
Validation MAE: 0.4305734634399414
Epoch 7/10


                                                                          

Training MAE: 0.36861634254455566
Validation MAE: 0.43555325269699097
Epoch 8/10


                                                                          

Training MAE: 0.37048736214637756
Validation MAE: 0.4153284430503845
Epoch 9/10


                                                                          

Training MAE: 0.3729897141456604
Validation MAE: 0.42487865686416626
Epoch 10/10


                                                                          

Training MAE: 0.3719906210899353
Validation MAE: 0.4315313696861267


In [None]:

# Generate new id and create save directory
existing_ids = [int(name) for name in os.listdir('saved-models/') if name.isnumeric()]
run_id = random.choice(list(set(range(0, 1000)) - set(existing_ids)))
save_directory = f'saved-models/regressor/{run_id:03d}/'
os.makedirs(os.path.dirname(save_directory), exist_ok=True)

# Save model, preprocessor and training history
model.save(save_directory)
with open(save_directory + 'preprocessor', 'wb') as f:
    dump(preprocessor, f, compress=3)
pd.DataFrame(history.history).to_csv(save_directory + 'train_history.csv')

# Plot accuracy
plt.plot(history.history['mean_absolute_error'])
plt.plot(history.history['val_mean_absolute_error'])
plt.title('model accuracy')
plt.ylabel('mean absolute error')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.savefig(save_directory + 'accuracy.png')
plt.clf()

# Plot loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.savefig(save_directory + 'loss.png')

# Evaluate
# run_id = 186
# model = load_model(f'saved-models/regressor/{run_id:03d}/')
# with open(f'saved-models/regressor/{run_id:03d}/preprocessor', 'rb') as f:
#     preprocessor = load(f)
X_test, y_test = preprocessor.transform(test_data)
scores = model.evaluate({'inputs': X_test, 'targets': y_test})

# Save evaluation results
if not isinstance(scores, list):
    scores = [scores]
row = [run_id] + scores + [pd.Timestamp.now(tz='Australia/Melbourne')]
try:
    df_scores = pd.read_csv('saved-models/scores.csv')
    df_scores.loc[len(df_scores)] = row
except (FileNotFoundError, ValueError):
    df_scores = pd.DataFrame([row], columns=['id'] + list(model.metrics_names) + ['time'])
df_scores.to_csv('saved-models/scores.csv', index=False)

# # Predict
# # y_test is only used to calculate loss, how to get rid of it?
# y_pred = model.predict({'inputs': X_test, 'targets': y_test})
# y_pred = preprocessor.scaler.inverse_transform(y_pred.reshape(-1, y_test.shape[-1]))
# y_test = preprocessor.scaler.inverse_transform(y_test.reshape(-1, y_test.shape[-1]))
# comparison = np.hstack([y_pred, y_test])
# df = pd.DataFrame(comparison, columns=['Predicted', 'Actual'])
# df.to_csv(f'saved-models/regressor/{run_id:03d}/comparison.csv')