In [None]:
import pandas as pd
import numpy as np

from keras._tf_keras.keras.layers import Dense, LSTM, Input, Bidirectional, GRU, Dropout
from keras._tf_keras.keras.models import Model, Sequential, load_model
from keras._tf_keras.keras.optimizers import Adam, RMSprop, Adagrad
from keras._tf_keras.keras.callbacks import EarlyStopping, LearningRateScheduler

from tqdm import tqdm

from data.city.load_cities import CITY

from sklearn.model_selection import train_test_split

import plotly.express as px

In [None]:
TRAIN_SIZE = 0.7
VALIDATION_SIZE = 0.2
TEST_SIZE = 0.1

df_dataset = CITY.df_hours.copy()
df_dataset = df_dataset.set_index('date')

In [None]:
target_station = '00001-poids-de-lhuile' # Base
target_station = '00076-republique-pl-olivier' # Good
# target_station = '00218-amouroux-ste-augustine' # À chier
# target_station = '00256-place-de-la-rontonde' # Normal
data_station = df_dataset[target_station]

prediction_length = 24 * 7
context_length = 24 * 30

In [None]:
# With features
# data_station = df_dataset.copy()
data_station = df_dataset[[target_station, 'date']].copy()
data_station.loc[:, 'hour'] = data_station['date'].dt.hour
data_station.loc[:, 'day_of_week'] = data_station['date'].dt.dayofweek
data_station.loc[:, 'is_weekend'] = (data_station['date'].dt.dayofweek >= 5).astype('int8')
data_station.loc[:, 'day_of_month'] = data_station['date'].dt.day
data_station = data_station.drop('date', axis='columns')

X = []; y = []
for i in range(0, len(data_station) - context_length - prediction_length):
    X.append(data_station.iloc[i:i + context_length].to_numpy())
    y.append(data_station.iloc[i + context_length: i + context_length + prediction_length][target_station].to_numpy())
X = np.array(X)
y = np.array(y)

# X = []; y = []
# time_features_columns = ['hour', 'day_of_week', 'is_weekend', 'day_of_month']
# for i in tqdm(range(0, len(data_station) - context_length - prediction_length)):
#         X.append(data_station.iloc[i:i + context_length].to_numpy())
#         y.append(data_station.iloc[i + context_length: i + context_length + prediction_length][data_station.columns.difference(time_features_columns)].to_numpy())
# X = np.array(X)
# y = np.array(y)

In [None]:
X = []; y = []
for i in range(0, len(data_station) - context_length - prediction_length):
    X.append(data_station.iloc[i:i + context_length].reset_index(drop=True))
    y.append(data_station.iloc[i + context_length: i + context_length + prediction_length].reset_index(drop=True))
X = pd.DataFrame(X).reset_index(drop=True).to_numpy().reshape((-1, 1, context_length))
y = pd.DataFrame(y).reset_index(drop=True).to_numpy().reshape((-1, 1, prediction_length))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=(1 - TRAIN_SIZE), shuffle=False)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=(TEST_SIZE / (TEST_SIZE + VALIDATION_SIZE)), shuffle=False)

print('X_train :', X_train.shape, '| X_valid :', X_valid.shape, '| X_test :', X_test.shape)
print('y_train :', y_train.shape, '| y_valid :', y_valid.shape, '| y_test :', y_test.shape)

In [None]:
model: Model = Sequential([
    Input(shape=X_train.shape[1:]),
    Bidirectional(LSTM(units=256, return_sequences=True, activation='selu', recurrent_activation='selu')),
    Dropout(rate=0.35),
    Bidirectional(LSTM(units=256, return_sequences=True, activation='selu', recurrent_activation='selu')),
    Dropout(rate=0.35),
    Bidirectional(LSTM(units=256, return_sequences=True, activation='selu', recurrent_activation='selu')),
    Dropout(rate=0.35),
    Dense(units=prediction_length, activation='sigmoid')
],
name='bike_station_model_LSTM')

In [None]:
model.compile(
    optimizer=Adam(),
    loss='mse',
    metrics=['mae'],
)
model.summary()

In [None]:
history = model.fit(
    x=X_train,
    y=y_train,
    epochs=256,
    batch_size=64,
    verbose=1,
    validation_data=(X_valid, y_valid),
    callbacks=[
        EarlyStopping(
            monitor='val_loss',
            patience=16,
            verbose=1,
            start_from_epoch=4,
            restore_best_weights=True
        )
    ]
)

In [None]:
# model.save("my_model.keras")

In [None]:
test_index = 0

predict = model.predict(X_test)[test_index, 0].flatten()
reality = y_test[test_index, 0].flatten()

length_df = X_test.shape[2] + y_test.shape[2]
df = pd.DataFrame(
    np.zeros((length_df, 2)) * np.nan,
    columns=['reality', 'prediction']
)
df.loc[:, 'reality'] = np.concatenate([X_test[test_index, 0].flatten(), reality])
df.loc[X_test.shape[2]:, 'prediction'] = predict
df.index = pd.date_range('1/1/2024', periods=length_df, freq='1h')
px.line(df)