In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import LSTM, Input, Dense, LSTMCell, RNN, Bidirectional, Concatenate
from sklearn.preprocessing import MinMaxScaler
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit

In [None]:
# Function to read data from a file
def read_data(file_location):
    df = pd.read_csv(file_location)
    return df

In [None]:
# Function to preprocess and clean data
def preprocess_data(df, aggregation='H', ws=24, number_of_predicted_days=2):

    df.dropna(inplace=True)

    df.drop_duplicates(subset=['source_ts'], inplace=True)

    datetime_series = pd.to_datetime(df['source_ts'])
    datetime_index = pd.DatetimeIndex(datetime_series.values)
    df=df.set_index(datetime_index)
    df.drop('source_ts',axis=1,inplace=True)

    df=df.asfreq(freq='S', method='ffill')

    lastDay = df.index[-1].strftime('%Y-%m-%d')
    df = df.loc[:lastDay].iloc[:-1 , :]
    df

    prediction_in_future_time = ws * number_of_predicted_days
    
    df_resampled = df.resample(aggregation).sum()
    df_resampled

    df = df_resampled
    n_splits = 4
    test_size = 48
    total_len = len ( df )
    fold_size = (total_len - test_size) // n_splits
    tscv = TimeSeriesSplit ( n_splits = n_splits)
    splits = []
    for train_index, test_index in tscv.split(df):
        test_indices = np.arange(test_index[0], test_index[0] + test_size)
        train_indices = np.arange(0, test_indices[0])
        splits.append((train_indices[0], train_indices[-1], test_indices[0], test_indices[-1]))

    return df_resampled, splits, n_splits

In [None]:
def create_model(no_input_features=1, no_output_features=1, layers=[6]):
    
    ## Encoder
    encoder_inputs = Input(shape=(None, no_input_features))
    lstm_cells = [LSTMCell(hidden_dim) for hidden_dim in layers]
    encoder = RNN(lstm_cells, return_state=True)
    encoder_outputs_and_states = encoder(encoder_inputs)
    encoder_states = encoder_outputs_and_states[1:]
    
    ## Decoder
    decoder_inputs = Input(shape=(None, no_output_features))
    decoder_cells = [LSTMCell(hidden_dim) for hidden_dim in layers]
    initial_states = encoder_states
        
    decoder_lstm = RNN(decoder_cells, return_sequences=True, return_state=True)

    decoder_outputs_and_states = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    decoder_outputs = decoder_outputs_and_states[0]

    decoder_dense = Dense(no_output_features, activation='relu') 
    decoder_outputs = decoder_dense(decoder_outputs)
    
    model = Model([encoder_inputs,decoder_inputs], decoder_outputs)
    return model
    

In [None]:
def create_input_sequences(data, ws):
    x_train, y_train = [], []

    for i in range(ws, len(data)-ws-1):
        x_train.append(data[i-ws:i, 0:1])
        y_train.append(data[i+1:i+1+ws, 0:1])
    
    return np.array(x_train), np.array(y_train)

In [None]:
def train_model(model, train_dataset, epochs=60, batch_size=32):
    
    model.compile(Adam(), loss='mean_squared_error')
    
    input_sequence, output_sequence = create_input_sequences(train_dataset, 24)

    encoder_input = input_sequence
    decoder_target = output_sequence
    decoder_input = np.zeros(decoder_target.shape)
    
    history = model.fit([encoder_input, decoder_input], decoder_target,
                       batch_size=batch_size,
                       epochs=epochs,
                       validation_split=0.1,
                       shuffle=False)
        
    return model

In [None]:
def make_predictions(model, initial_batch, input_len=24, output_len=24, ws=24, prediction_in_future_time=48):
    prediction_test = []
    batch = []
    batch = np.array([initial_batch[-ws:, 0:1]])  
    print(batch.shape)
    
    for _ in range(2):
        input_seq_test = batch
        element = np.array([[[0.]]]) 
        decoder_input_test = np.stack([element]*24, axis=1)
        prediction = model.predict([input_seq_test, decoder_input_test])[0]
        
        print(prediction)

        batch = np.array([np.append(batch[0,24:], prediction, axis=0)])
        
        prediction_test.append(prediction)
        
    return np.array(prediction_test)

In [None]:
def plot_results(test_set, predictions):
    plt.rcParams["figure.figsize"] = (40,3)
    plt.plot(test_set, color='green', label='Actual value')
    plt.plot(predictions, color='orange', label='Predicted value')
    plt.legend()
    plt.show()

In [None]:
def compute_prediction_direction_accuracy(test_set, predictions):
    correct_directions = 0
    for i in range(1, len(predictions)):
        pred_change = predictions[i] - predictions[i-1]
        actual_change = test_set[i] - test_set[i-1]
        if(pred_change > 0 and actual_change > 0) or (pred_change > 0 and actual_change < 0) or (pred_change == 0 and actual_change == 0):
            correct_directions += 1
    pda = (correct_directions / (len(predictions) - 1)) * 100
    return pda

In [None]:
# Function to compute the forecast interval coverage
def compute_forecast_interval_coverage(test_set, predictions, rmse):
    forecasted_intervals = [(lower, upper) for lower, upper in zip(predictions - 1.96 * rmse, predictions + 1.96 * rmse)]
    num_within_the_interval = sum((lower <= actual <= upper) for actual, (lower, upper) in zip (test_set, forecasted_intervals))
    total_observations = len(test_set)
    fic = num_within_the_interval / total_observations * 100
    return fic

In [None]:
# Function to compute the forecast bias
def compute_forecast_bias(test_set, predictions):
    differences = [predicted - actual for predicted, actual in zip(predictions, test_set)]
    bias = sum(differences) / len(differences)
    return bias

In [None]:
def main(file_location):
    
    df = read_data(file_location)
    
    df_resampled, splits, n_splits = preprocess_data(df)
    
    rmse_values=[]
    r_squared_values=[]
    
    for split in range(n_splits):
        train_start, train_end, test_start, test_end = splits[split]
        
        training_set = df_resampled[train_start:train_end+1].values
        test_set = df_resampled[test_start:test_end+1].values
    
        sc = MinMaxScaler(feature_range=(0, 1))
        training_set_scaled = sc.fit_transform(training_set)
    
        model = create_model(layers=[64])
        model = train_model(model, train_dataset=training_set_scaled, epochs=60)
        print(test_set)
        
        plt.rcParams["figure.figsize"] = (40,3)
        plt.plot(test_set)
        plt.show()
        batch_init = training_set_scaled[-ws:]
        predictions = make_predictions(model, batch_init)
    
        merged_predictions = [item for sublist in predictions for item in sublist]
        predictions = [[item.item()] for item in merged_predictions]

        predictions = sc.inverse_transform(predictions)

        rmse, rsquare = evaluate_model(test_set, predictions)
        rmse_values.append(rmse)
        r_squared_values.append(rsquare)
        
    print("RMSE values= ",rmse_values)
    print("R2 values= ", r_squared_values)

    print("Average RMSE: ", np.mean(rmse_values))
    print("Average R2: ", np.mean(r_squared_values))

In [None]:
# Function to evaluate the model
def evaluate_model(test_set, predictions):
    rmse = np.sqrt(mean_squared_error(test_set, predictions))
    rsquare = r2_score(test_set, predictions)
    return rmse, rsquare

In [None]:
ws = 24
prediction_in_future_time = ws * 2
main("./testData/Dataset1.csv")