## CUSTOMIZATION
---

In [0]:
import tensorflow as tf
tf.test.gpu_device_name()
# подключить GPU: Runtime -> Change runtime type -> Hardware accelerator: GPU
# вывouyод: '/device:GPU:0'

'/device:GPU:0'

In [0]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
# монтируем гугл диск

In [0]:
# копируем и разархивируем файлы в colab
! cp /content/gdrive/'My Drive'/Mouse/dataset.zip . # тут точка!
! unzip -q dataset
! rm dataset.zip
! ls

dataset  gdrive  sample_data


### import
---

In [0]:
from tensorflow.keras.models import Sequential, Model, save_model, load_model
from tensorflow.keras.layers import (Dense, BatchNormalization, Dropout,
                                     ReLU, ELU, Softmax)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import os

### global
---


In [0]:
N_EXPERIMENT = 1

MODEL_NAME = f'{N_EXPERIMENT}_mouse_model'
WEIGHT_NAME = f'{N_EXPERIMENT}_mouse_weight'
LOG_NAME = f'{N_EXPERIMENT}_training_log.csv'
GDRIVE_PATH = f'./gdrive/My Drive/Mouse/{N_EXPERIMENT}'
CHECKPOINT_PATH = f'{GDRIVE_PATH}/{WEIGHT_NAME}''_{epoch:03d}_loss-{loss:.3f}_valloss-{val_loss:.3f}.h5'
N_FEATURES = (79,)
BATCH_SIZE = 8

### load data
---

In [0]:
def load_data(dataset: str,
              username: str,
              session: str,
              mode: str,
              legal: bool = True) -> np.array:
    if legal:
        path = f"./dataset/{dataset}/{mode}_features/{username}/{session}"
        X = pd.read_csv(path, sep=',', header=None).values
    else:
        X = None
        for path in glob.glob(f"./dataset/{dataset}/{mode}_features/user*"):
            if os.path.basename(path) == username:
                continue
            session_path = os.path.join(path, session)
            features = pd.read_csv(session_path, sep=',', header=None).values
            X = np.vstack((X, features)) if X is not None else features
    return X

In [0]:
DATASET = 'BALABIT'
USERNAME = 'user07'
SESSION = 'session_all_quantile_4'

In [0]:
X_legal = load_data(DATASET, USERNAME, SESSION, 'train')
X_illegal = load_data(DATASET, USERNAME, SESSION, 'train', legal=False)
X = np.vstack((X_legal, X_illegal))
N_FEATURES = (X.shape[1], )
X.shape

(47310, 79)

In [0]:
# for Fully Сonnected NN
y = np.zeros((X.shape[0], 2))
y[:X_legal.shape[1], 0] = 1
y[-X_illegal.shape[1]:, 1] = 1

In [0]:
# for autoencoder
# y = X.copy()

---

In [0]:
X_test_legal = load_data(DATASET, USERNAME, SESSION, 'test')
X_test_illegal = load_data(DATASET, USERNAME, SESSION, 'test', legal=False)
X_test = np.vstack((X_test_legal, X_test_illegal))
N_FEATURES = (X_test.shape[1], )

y_test = np.zeros((X_test.shape[0], 2))
y_test[:X_test_legal.shape[1], 0] = 1
y_test[-X_test_illegal.shape[1]:, 1] = 1

X_test.shape

(44807, 79)

### model
---

In [0]:
def get_model(input_shape): # "FullyСonnected"
    model = Sequential()
    model.add(Dense(units=64, input_shape=input_shape, activation=ReLU()))
    model.add(Dropout(rate=0.4))
    model.add(Dense(units=32, activation=ReLU()))
    model.add(Dropout(rate=0.4))
    model.add(Dense(units=16, activation=ReLU()))
    model.add(Dropout(rate=0.4))
    model.add(Dense(units=8, activation=ReLU()))
    model.add(Dropout(rate=0.4))
    model.add(Dense(units=2, activation=Softmax()))

    return model

In [0]:
def get_model(input_shape):
    model = Sequential()
    model.add(Dense(units=32, input_shape=input_shape, activation=ReLU()))
    model.add(Dropout(rate=0.5))
    model.add(Dense(units=2, activation=Softmax()))

    return model

In [0]:
def get_model(input_shape):
    model = Sequential()
    model.add(BatchNormalization(input_shape=input_shape))
    model.add(Dense(units=64, activation=ELU()))
    model.add(Dense(units=64, activation=ELU()))
    model.add(BatchNormalization())
    model.add(Dense(units=32, activation=ELU()))
    model.add(Dense(units=32, activation=ELU()))
    model.add(BatchNormalization())
    model.add(Dense(units=16, activation=ELU()))
    model.add(Dense(units=16, activation=ELU()))
    model.add(BatchNormalization())
    model.add(Dense(units=8, activation=ELU()))
    model.add(Dense(units=8, activation=ELU()))
    model.add(BatchNormalization())
    model.add(Dense(units=4, activation=ELU()))
    model.add(Dense(units=4, activation=ELU()))
    model.add(BatchNormalization())
    model.add(Dense(units=2, activation=Softmax()))

    return model

In [0]:
model = get_model(N_FEATURES)
model.summary()

### Custom metrics
---

In [0]:
def FAR(y_true, y_pred):
    y_true = K.argmax(y_true, axis=1)
    y_pred = K.argmax(y_pred, axis=1)
    return K.sum(K.cast(y_pred[y_true == 1] == 0, 'int8')) / y_pred.shape[0]

def FRR(y_true, y_pred):
    y_true = K.argmax(y_true, axis=1)
    y_pred = K.argmax(y_pred, axis=1)
    return K.sum(K.cast(y_pred[y_true == 0] == 1, 'int8')) / y_pred.shape[0]

### Compile
---

In [0]:
model.compile(optimizer=Adam(lr=0.001),
              loss='binary_crossentropy',
              metrics=['mse'])

### fit
---

In [0]:
# CALLBACKS
checkpointer = ModelCheckpoint(filepath=CHECKPOINT_PATH, monitor='val_loss',
                               verbose=1, save_best_only=True,
                               save_weights_only=True, mode='min')
logger = CSVLogger(LOG_NAME)

In [0]:
model.fit(X, y,
          batch_size=64,
          epochs=50,
          verbose=1,
        #   callbacks=[logger, checkpointer],
          validation_data=(X_test, y_test),
          shuffle=True)

In [0]:
y_pred = model.predict(X)
acc = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y, axis=1))
FAR = np.mean( (np.argmax(y_pred, axis=1)[:X_test_legal.shape[0]]) != (np.argmax(y, axis=1)[:X_test_legal.shape[0]]) )
FRR = np.mean( (np.argmax(y_pred, axis=1)[-X_test_illegal.shape[0]:]) != (np.argmax(y, axis=1)[-X_test_illegal.shape[0]:]) )
print(f"accuracy = {acc}, FAR = {FAR}, FRR = {FRR}")

In [0]:
y_pred = model.predict(X_test)
acc = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1))
FAR = np.mean( (np.argmax(y_pred, axis=1)[:X_test_legal.shape[0]]) != (np.argmax(y_test, axis=1)[:X_test_legal.shape[0]]) )
FRR = np.mean( (np.argmax(y_pred, axis=1)[-X_test_illegal.shape[0]:]) != (np.argmax(y_test, axis=1)[-X_test_illegal.shape[0]:]) )
print(f"accuracy = {acc}, FAR = {FAR}, FRR = {FRR}")

In [0]:
X_test_legal = load_data('BALABIT', 'user09', 'session_all', 'test')
X_test_illegal = load_data('BALABIT', 'user09', 'session_all', 'test', legal=False)
X_test = np.vstack((X_test_legal, X_test_illegal))
N_FEATURES = (X_test.shape[1], )

y_test = np.zeros((X_test.shape[0], 2))
y_test[:, 1] = 1

X_test.shape

(134401, 79)

In [0]:
y_pred = model.predict(X_test)
acc = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1))
FAR = np.mean( (np.argmax(y_pred, axis=1)[:X_test_legal.shape[0]]) != (np.argmax(y_test, axis=1)[:X_test_legal.shape[0]]) )
FRR = np.mean( (np.argmax(y_pred, axis=1)[-X_test_illegal.shape[0]:]) != (np.argmax(y_test, axis=1)[-X_test_illegal.shape[0]:]) )
print(f"accuracy = {acc}, FAR = {FAR}, FRR = {FRR}")