In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential, Model
from keras.layers import LSTM, Dense, Dropout, Bidirectional, BatchNormalization, Input 
from attention import Attention
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit

In [None]:
def read_data(file_location):
    df = pd.read_csv(file_location)
    return df

In [None]:
def preprocess_data(df, aggregation='H', ws=24, number_of_predicted_days=2):

    df.drop_duplicates(subset=['source_ts'], inplace=True)

    datetime_series = pd.to_datetime(df['source_ts'])
    datetime_index = pd.DatetimeIndex(datetime_series.values)
    df=df.set_index(datetime_index)
    df.drop('source_ts',axis=1,inplace=True)

    df=df.asfreq(freq='S', method='ffill')

    lastDay = df.index[-1].strftime('%Y-%m-%d')
    df = df.loc[:lastDay].iloc[:-1 , :]
    df

    prediction_in_future_time = ws * number_of_predicted_days
    
    df_resampled = df.resample(aggregation).sum()
    df_resampled
    
    df = df_resampled
    n_splits = 4
    test_size = 48
    total_len = len ( df )
    fold_size = (total_len - test_size) // n_splits
    tscv = TimeSeriesSplit ( n_splits = n_splits)
    splits = []
    for train_index, test_index in tscv.split(df):
        test_indices = np.arange(test_index[0], test_index[0] + test_size)
        train_indices = np.arange(0, test_indices[0])
        splits.append((train_indices[0], train_indices[-1], test_indices[0], test_indices[-1]))

    return df_resampled, splits, n_splits

In [None]:
def create_model (shape):
    model = Sequential()
    
    model.add(Bidirectional(LSTM(units=32, return_sequences=True), input_shape=(shape,1)))
    model.add(Dropout(0.1))
    model.add(Bidirectional(LSTM(32, return_sequences=True)))
    model.add(Dropout(0.1))
    model.add(Bidirectional(LSTM(32, return_sequences=True)))
    model.add(Dropout(0.1))
    model.add(Bidirectional(LSTM(32, return_sequences=True)))
    model.add(Dropout(0.1))
    model.add(Bidirectional(LSTM(32, return_sequences=True)))
    model.add(Attention(8))
    model.add(Dense(units=1))
    
    return model


In [None]:
def train_model(model, x_train, y_train, epochs=30, batch_size=32):
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)
    return model

In [None]:
def make_predictions(model, batch_one, prediction_in_future_time):
    prediction_test = []
    batch_new = batch_one.reshape((1, ws, 1))
    
    for _ in range(prediction_in_future_time):
        first_pred = model.predict(batch_new)[0]
        prediction_test.append(first_pred)
        batch_new = np.append(batch_new[:, 1:, :], [[first_pred]], axis=1)

    return np.array(prediction_test)


In [None]:
def evaluate_model(test_set, predictions):
    rmse = np.sqrt(mean_squared_error(test_set, predictions))
    rsquare = r2_score(test_set, predictions)
    return rmse, rsquare

In [None]:
def plot_results(test_set, predictions):
    plt.rcParams["figure.figsize"] = (40,3)
    plt.plot(test_set, color='green', label='Actual value')
    plt.plot(predictions, color='orange', label='Predicted value')
    plt.legend()
    plt.show()

In [None]:
def create_input_sequences(data, ws):
    x_train, y_train = [], []

    for i in range(ws, len(data)):
        x_train.append(data[i-ws:i, 0:1])
        y_train.append(data[i, 0])
    
    return np.array(x_train), np.array(y_train)

In [None]:
def main(file_location):
    df = read_data(file_location)

    df_resampled, splits, n_splits = preprocess_data(df)

    rmse_values = []
    r_squared_values = []
    
    for split in range(n_splits):
        train_start, train_end, test_start, test_end = splits[split]
        training_set = df_resampled[train_start:train_end + 1].values
        test_set = df_resampled[test_start:test_end + 1].values
        sc = MinMaxScaler(feature_range=(0,1))
        training_set_scaled = sc.fit_transform(training_set)
        x_train, y_train = create_input_sequences(training_set_scaled, ws)
        model = create_model(x_train.shape[1])
        model = train_model(model, x_train, y_train)
        batch_one = training_set_scaled[-ws:]
        prediction_in_future_time = test_end - test_start + 1
        prediction = make_predictions(model, batch_one, prediction_in_future_time)
        prediction = sc.inverse_transform(prediction)
        rmse, rsquare = evaluate_model(test_set, prediction)
        rmse_values.append(rmse)
        r_squared_values.append(rsquare)
        plot_results(test_set, prediction)

    print("RMSE values= ",rmse_values)
    print("R2 values= ", r_squared_values)

    print("Average RMSE: ", np.mean(rmse_values))
    print("Average R2: ", np.mean(r_squared_values))

In [None]:
ws = 24
prediction_in_future_time = ws * 2
main("./testData/Dataset1.csv")