In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import keras

In [3]:
from keras.layers import Input, Dense
from keras.models import Model
import numpy as np
import matplotlib.pyplot as plt

In [4]:
d1 = pd.read_csv('datasets/r129c12s02/dataset_jan_r129c12s02.csv')
d2 = pd.read_csv('datasets/r129c12s02/dataset_may_r129c12s02.csv')
data = pd.concat([d1,d2], axis = 0)

In [5]:
data = data.drop(['jobs', 'Unnamed: 0', 'timestamp'], axis = 1)

In [7]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, test_size=0.2, random_state=42)

In [8]:
train_0, train_1 = train[train.label == 0], train[train.label == 1]
test_0, test_1 = test[test.label == 0], test[test.label == 1]

In [9]:
from numpy import array
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.utils import plot_model


def DL_model(ishape = 120, full = True):
    
    visible = Input(shape=(ishape),name='input')
    encoder =Dense(100, activation='relu', name='encoder_0')(visible)
    layers = [80,60,40]
    names = [1,2,3]
    for i in range(len(layers)):
        encoder = Dense(layers[i], activation = 'relu',name='encoder_{}'.format(names[i]))(encoder)

    hidden = Dense(20,activation = 'relu', name='lattent')(encoder)

    # define reconstruct decoder
    decoder = Dense(40, activation='relu',name='decoder_0')(hidden)
    layers2 = [60,80,100]
    for i in range(len(layers2)):
        decoder = Dense(layers2[i], activation='relu',name='decoder_{}'.format(names[i]))(decoder)
    decoder = Dense(ishape, activation='sigmoid', name='autoencoder_output')(decoder)

    # define classifier
    classifier = Dense(20, activation='relu', name="hidden")(hidden)
    classifier = Dense(2, activation='softmax', name="classifier_output")(classifier)

    model = Model(inputs=visible, outputs=[decoder, classifier])

    model_dec = Model(inputs=visible, outputs=[decoder])
    model_cla = Model(inputs=visible, outputs=[classifier])
    
    if full:
        return model
    else:
        return model_dec

In [10]:
decoder = DL_model(120, False)
full = DL_model(120, True)

In [11]:
full.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss={
        "autoencoder_output": keras.losses.MeanSquaredError(),
        "classifier_output": keras.losses.BinaryCrossentropy()
    },
    metrics={"autoencoder_output" : 'MeanSquaredError', "classifier_output": 'accuracy'}
)

In [12]:
decoder.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss={
        "autoencoder_output": keras.losses.MeanSquaredError(),
    },
    metrics={"autoencoder_output" : 'MeanSquaredError'}
)

In [13]:
from sklearn.preprocessing import MinMaxScaler

# Training Autoencoder on normal data

In [14]:
X0_train = train_0.drop('label', axis = 1).values
scaler_dec = MinMaxScaler()
X0_train = scaler_dec.fit_transform(X0_train)
history_auto = decoder.fit(
    X0_train,
    {"autoencoder_output": X0_train},
    batch_size = 10,
    epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Training classifier on all data

In [15]:
y_train = train.label.values
X_train = train.drop('label', axis = 1).values
scaler_cla = MinMaxScaler()
X0_train = scaler_cla.fit_transform(X0_train)
history_auto = full.fit(
    X_train,
    {"autoencoder_output": X_train, "classifier_output": y_train},
    batch_size = 10,
    epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Combining the classifiers and evaluating the results

In [17]:
def predict_au(data, thr = 0.6):
    data = data.drop('label', axis = 1).values
    error_v = scaler_dec.fit_transform(data) - decoder.predict(scaler_dec.fit_transform(data))
    error_v = pd.DataFrame(error_v, columns = list(train.drop(['label'], axis = 1))).ewm(span=data.shape[0]).mean()
    error_v["sum"] = error_v.sum(axis=1)
    error_v["label_pred"] = error_v["sum"].apply(lambda x: 1 if x >= thr else 0)
    return list(error_v["label_pred"])

In [18]:
def predict_cla(data):
    data = data.drop('label', axis = 1).values
    data = scaler_cla.fit_transform(data)
    prediction =  full.predict(data)[1]
    r = []
    for i in prediction:
        if i[0] > i[1]:
            r.append(0)
        else:
            r.append(1)
    return r

In [19]:
def predict_combined(data, thr = 0.95):
    pred_au = predict_au(data, thr)
    pred_cla = predict_cla(data)
    combined = [max(pred_au[i], pred_cla[i]) for i in range(len(pred_au))]
    return combined

In [20]:
from sklearn.metrics import classification_report

In [21]:
print(classification_report(list(test.label), predict_combined(test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2813
           1       0.00      0.00      0.00         2

    accuracy                           1.00      2815
   macro avg       0.50      0.50      0.50      2815
weighted avg       1.00      1.00      1.00      2815



In [22]:
# comparion to random forest
from sklearn.ensemble import RandomForestClassifier
RF = RandomForestClassifier(n_estimators=10)
RF = RF.fit(train.drop('label', axis = 1).values, train.label)
pred = RF.predict(test.drop('label', axis = 1).values)
print(classification_report(list(test.label), pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2813
           1       0.00      0.00      0.00         2

    accuracy                           1.00      2815
   macro avg       0.50      0.50      0.50      2815
weighted avg       1.00      1.00      1.00      2815

