In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

In [None]:
X_train = np.load(os.path.join('assets', 'X_train_surge_new.npz'))
Y_train = pd.read_csv(os.path.join('assets', 'Y_train_surge.csv'))
X_test = np.load(os.path.join('assets', 'X_test_surge_new.npz'))

In [None]:
x_surge1 = X_train['surge1_input']  # (5599, 10)
x_surge2 = X_train['surge2_input']  # (5599, 10)

x_slp = X_train['slp']       # (5599, 40, 41, 41)
x_slp = x_slp[:, ::4, :, :]  # (5599, 10, 41, 41)

y_surge1 = np.array([Y_train[f'surge1_t{i}'] for i in range(10)]).T  # (5599, 10)
y_surge2 = np.array([Y_train[f'surge2_t{i}'] for i in range(10)]).T  # (5599, 10)

print(x_surge1.shape, y_surge1.shape)

In [None]:
# create input data (each input is a tuple of (surge1, surge2, slp))
x_slp_flat = x_slp.reshape(x_slp.shape[0], -1)
print(x_surge1.shape, x_surge2.shape, x_slp_flat.shape)

input_data = np.concatenate([x_surge1, x_surge2, x_slp_flat], axis=1)
print(input_data.shape)

output_data = np.concatenate([y_surge1, y_surge2], axis=1)

# create time series data
def create_time_series_data(data, time_steps=10):
  X, Y = [], []
  for i in range(len(data) - time_steps):
    X.append(data[i: i + time_steps])
    Y.append(data[i + time_steps])
  return np.array(X), np.array(Y)

# create time series data
X, Y = create_time_series_data(input_data, time_steps=10)
print(X.shape, Y.shape)

# create train and test data
X_train, X_test = X[:5000], X[5000:]
Y_train, Y_test = Y[:5000], Y[5000:]

del X, Y, input_data, output_data, x_surge1, x_surge2, x_slp_flat, x_slp, y_surge1, y_surge2


In [None]:
# create model
model = tf.keras.Sequential([
  tf.keras.layers.LSTM(128, input_shape=(10, 16830), return_sequences=True),
  tf.keras.layers.LSTM(128, return_sequences=True),
  tf.keras.layers.Dense(20)
])

model.compile(loss='mse', optimizer='adam', metrics=['mae'])


In [None]:
# fit model
hitsory = model.fit(X_train, Y_train, epochs=100, validation_data=(X_test, Y_test))

# plot loss
plt.plot(hitsory.history['loss'], label='train')
plt.plot(hitsory.history['val_loss'], label='test')
plt.legend()
plt.show()


In [None]:
# make predictions
Y_pred = model.predict(X_test)

score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score)
 
# plot predictions
plt.plot(Y_test[:, 0], label='actual')
plt.plot(Y_pred[:, 0], label='predicted')
plt.legend()
plt.show()

In [None]:
import surge_prediction_metric as surge

# convert predictions to dataframe
Y_pred_df = pd.DataFrame(Y_pred, columns=[f'surge1_t{i}' for i in range(10)] + [f'surge2_t{i}' for i in range(10)])
Y_true_df = pd.DataFrame(Y_test, columns=[f'surge1_t{i}' for i in range(10)] + [f'surge2_t{i}' for i in range(10)])

s1 = surge.surge_prediction_metric(Y_true_df[:10], Y_pred_df[:10])
s2 = surge.surge_prediction_metric(Y_true_df[10:], Y_pred_df[10:])
print(s1, s2)