In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow import keras

from sail.models.keras import OSELM

#### Load Data


In [2]:
df = pd.read_csv(
    "../../datasets/nyc_taxi.csv",
    header=0,
    names=["time", "data", "timeofday", "dayofweek"],
)
meanSeq = np.mean(df["data"])
stdSeq = np.std(df["data"])
df["data"] = (df["data"] - meanSeq) / stdSeq
df.isnull().sum()

time         0
data         0
timeofday    0
dayofweek    0
dtype: int64

#### Get Lag features


In [3]:
def getTimeEmbeddedMatrix(sequence, numLags=100, predictionStep=1):
    print("generate time embedded matrix")
    inDim = numLags
    X = np.zeros(shape=(len(sequence), inDim))
    T = np.zeros(shape=(len(sequence), 1))
    for i in range(numLags - 1, len(sequence) - predictionStep):
        X[i, :] = np.array(sequence["data"][(i - numLags + 1) : (i + 1)])
        T[i, :] = sequence["data"][i + predictionStep]
    print("input shape: ", X.shape)
    print("target shape: ", T.shape)
    return X, T


numLags = 100
predictionStep = 5
Xh, Th = getTimeEmbeddedMatrix(df, numLags, predictionStep)

split_ratio = int(10 * 1.2 * 25)
X = Xh[split_ratio:]
y = Th[split_ratio:]

generate time embedded matrix
input shape:  (17520, 100)
target shape:  (17520, 1)


#### Model Definition


In [4]:
model = OSELM(
    loss="mae",
    optimizer=keras.optimizers.Adam,
    metrics=["accuracy"],
    epochs=1,
    verbose=0,
    num_hidden_nodes=25,
    hidden_layer_activation=tf.nn.sigmoid,
    num_output_nodes=1,
    forgetting_factor=0.5,
)

#### Start Training


In [5]:
y_preds = []
y_true = []
batch_size = 1

for i in range(numLags, 800):
    if i != numLags:
        preds = model.predict(X[[i], :])
        if preds is not None:
            y_preds.append(preds[0][0])
            y_true.append(y[i - 1][0])
        print(
            "{:5}th timeStep -  target: {:8.4f}   |    prediction: {:8.4f} ".format(
                i, y_true[-1], y_preds[-1]
            )
        )
    else:
        model.partial_fit(np.array(X[[i], :]), np.array(y[[i], :]), verbose=0)

Cause: mangled names are not yet supported
Cause: mangled names are not yet supported


2023-12-12 15:56:55.520844: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


  101th timeStep -  target:   0.3540   |    prediction:   0.3526 
  102th timeStep -  target:   0.2173   |    prediction:   0.3382 
  103th timeStep -  target:   0.4038   |    prediction:   0.3495 
  104th timeStep -  target:   0.4585   |    prediction:   0.3443 
  105th timeStep -  target:   0.4472   |    prediction:   0.3684 
  106th timeStep -  target:   0.4466   |    prediction:   0.3739 
  107th timeStep -  target:   0.5003   |    prediction:   0.3645 
  108th timeStep -  target:   0.5001   |    prediction:   0.3478 
  109th timeStep -  target:   0.5082   |    prediction:   0.3172 
  110th timeStep -  target:   0.4639   |    prediction:   0.3207 
  111th timeStep -  target:   0.3397   |    prediction:   0.3135 
  112th timeStep -  target:   0.0146   |    prediction:   0.3035 
  113th timeStep -  target:   0.0417   |    prediction:   0.3180 
  114th timeStep -  target:   0.2972   |    prediction:   0.3287 
  115th timeStep -  target:   0.6576   |    prediction:   0.3250 
  116th ti

In [6]:
# Reconstruct original value
predictions = np.array(preds)
target = np.array(y_true)
predictions = predictions * stdSeq + meanSeq
target = target * stdSeq + meanSeq


def computeSquareDeviation(predictions, truth):
    squareDeviation = np.square(predictions - truth)
    return squareDeviation


# Calculate NRMSE from skip_eval to the end
skip_eval = 100
squareDeviation = computeSquareDeviation(predictions, target)
squareDeviation[:skip_eval] = None
nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions)
print("NRMSE {}".format(nrmse))

NRMSE nan


  nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions)


In [9]:
import plotly.express as px

df = pd.DataFrame({"y_true": y_true, "y_preds": y_preds}).head(500)
fig = px.line(df, y=["y_true", "y_preds"], title="")
fig.show()