In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, roc_curve, auc
import math

RANDOM_SEED = 2021

# Function to split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

if __name__ == '__main__':
    data = pd.read_csv('yahoo7.csv')
    X_data = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values
    scaler = StandardScaler()
    X_data = scaler.fit_transform(X_data)

    # Choose the number of time steps
    n_steps = 3

    train_threshold = math.floor(0.7 * len(data))
    X_train, y_train = split_sequence(X_data[:train_threshold][y[:train_threshold] == 0], n_steps)
    X_test, y_test = split_sequence(X_data, n_steps)

    nb_epoch = 50
    batch_size = 64
    input_dim = X_train.shape[1]  # Adjusted to the number of features after splitting
    print(input_dim)

    input_layer = tf.keras.layers.Input(shape=(n_steps, 1))
    lstm_layer = tf.keras.layers.LSTM(units=14, activation='tanh', return_sequences=True)(input_layer)
    lstm_layer = tf.keras.layers.Dropout(0.2)(lstm_layer)
    lstm_layer = tf.keras.layers.LSTM(units=8, activation='tanh')(lstm_layer)

    output_layer = tf.keras.layers.Dense(1, activation='tanh')(lstm_layer)

    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    model.summary()

    cp = tf.keras.callbacks.ModelCheckpoint(filepath="lstm_fraud.h5",
                                            mode='min', monitor='val_loss', verbose=2, save_best_only=True)

    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0.0001,
        patience=10,
        verbose=1,
        mode='min',
        restore_best_weights=True)
    model.compile(metrics=['accuracy'], loss='mean_squared_error', optimizer='adam')

    history = model.fit(X_train, y_train,
                        epochs=nb_epoch,
                        batch_size=batch_size,
                        shuffle=True,
                        verbose=1,
                        callbacks=[early_stop]
                        ).history

    test_x_predictions = model.predict(X_test)
    test_x_predictions = np.squeeze(test_x_predictions, axis=-1)
    mse = np.mean(np.power(X_test - test_x_predictions, 2), axis=(1, 2))

    mean_mse = np.mean(mse)
    std_mse = np.std(mse)
    lper = np.percentile(mse, 5)
    uper = np.percentile(mse, 95)

    y_pred = mse.copy()
    y_pred = np.array(y_pred)

    count = 0
    for itr in range(len(y_pred)):
        if (mse[itr] <= lper) or (mse[itr] >= uper):
            count = count + 1
            y_pred[itr] = 1
        else:
            y_pred[itr] = 0
    print(y_pred)
    y_test = y[3:]
    print(y_test)
    precision_auto = precision_score(y_test, y_pred)
    recall_auto = recall_score(y_test, y_pred)
    f1_score_auto = f1_score(y_test, y_pred)
    fpr8, tpr8, thresholds = roc_curve(y_test, y_pred)
    auc_roc_auto = auc(fpr8, tpr8)

    print("Precision: {:.4f}".format(precision_auto))
    print("Recall: {:.4f}".format(recall_auto))
    print("F1-score: {:.4f}".format(f1_score_auto))
    print("AUC-ROC: {:.4f}".format(auc_roc_auto))
