## CUSTOMIZATION
---

In [0]:
import tensorflow as tf
tf.test.gpu_device_name()
# подключить GPU: Runtime -> Change runtime type -> Hardware accelerator: GPU
# вывод: '/device:GPU:0'

'/device:GPU:0'

In [0]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
# монтируем гугл диск

In [0]:
# копируем и разархивируем файлы в colab
! cp /content/gdrive/'My Drive'/Mouse/dataset.zip . # тут точка!
! unzip -q dataset
! rm dataset.zip
! ls

dataset  gdrive  sample_data


### import
---

In [0]:
from tensorflow.keras.models import Sequential, Model, save_model, load_model
from tensorflow.keras.layers import (Dense, BatchNormalization, Dropout,
                                     Input, ReLU, ELU, Softmax)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import os

### global
---

In [0]:
N_EXPERIMENT = 1

MODEL_NAME = f'{N_EXPERIMENT}_mouse_model'
WEIGHT_NAME = f'{N_EXPERIMENT}_mouse_weight'
LOG_NAME = f'{N_EXPERIMENT}_training_log.csv'
GDRIVE_PATH = f'./gdrive/My Drive/Mouse/{N_EXPERIMENT}'
CHECKPOINT_PATH = f'{GDRIVE_PATH}/{WEIGHT_NAME}''_{epoch:03d}_loss-{loss:.3f}_valloss-{val_loss:.3f}.h5'
N_FEATURES = (79,)
BATCH_SIZE = 64

### load data
---

In [0]:
from sklearn.preprocessing import StandardScaler

In [0]:
def load_data(dataset: str,
              username: str,
              session: str,
              mode: str):
    if (mode != 'train') and (mode != 'test'):
        print(f"[Warning]: Invalid parameter <mode>: {mode}",
              f"           available: 'train', 'test'")
        exit(-1)
    
    X = None
    for path in glob.glob(f"./dataset/{dataset}/{mode}_features/user*"):
        session_path = os.path.join(path, session)
        features = pd.read_csv(session_path, sep=',', header=None).values
        X = np.vstack((X, features)) if X is not None else features

    X = StandardScaler().fit_transform(X)

    return X, X.copy()

In [0]:
DATASET = 'BALABIT'
USERNAME = 'user07'
SESSION = 'session_all'

In [0]:
X, y = load_data(DATASET, USERNAME, SESSION, mode='train')
X_test, y_test = load_data(DATASET, USERNAME, SESSION, mode='test')

N_FEATURES= (X.shape[1], )
print(f"train: {X.shape}")
print(f"test : {X_test.shape}")

### model
---

In [0]:
def get_model(input_shape):
    i = Input(shape=input_shape)
    d = Dense(units=64, activation=ELU())(i)
    # d = Dense(units=64, activation=ELU())(d)
    # d = Dropout(rate=0.1)(d)
    d = Dense(units=32, activation=ELU())(d)
    # d = Dense(units=32, activation=ELU())(d)
    # d = Dropout(rate=0.1)(d)

    m = Dense(units=16, activation=ELU())(d)
    # m = Dense(units=16, activation=ELU())(m)

    u = Dense(units=32, activation=ELU())(m)
    # u = Dense(units=32, activation=ELU())(u)
    # u = Dropout(rate=0.1)(u)
    u = Dense(units=64, activation=ELU())(u)
    # u = Dense(units=64, activation=ELU())(u)
    o = Dense(units=input_shape[0])(u)

    encoder_decoder = Model(inputs=i, outputs=o, name="ED")
    return encoder_decoder

In [0]:
def get_model(input_shape):
    i = Input(shape=input_shape)
    d = Dense(units=64, activation=ELU())(i)
    m = Dense(units=32, activation=ELU())(d)
    u = Dense(units=64, activation=ELU())(m)
    o = Dense(units=input_shape[0])(u)

    encoder_decoder = Model(inputs=i, outputs=o, name="ED")
    return encoder_decoder

### Compile
---


In [0]:
model = get_model(N_FEATURES)
model.summary()

In [0]:
model.compile(optimizer=Adam(lr=0.001),
              loss=mean_squared_error)

### fit
---

In [0]:
# CALLBACKS
checkpointer = ModelCheckpoint(filepath=CHECKPOINT_PATH, monitor='val_loss',
                               verbose=1, save_best_only=True,
                               save_weights_only=True, mode='min')
logger = CSVLogger(LOG_NAME)

In [0]:
model.fit(X, y,
          batch_size=16,
          epochs=10,
          verbose=1,
          # callbacks=[logger, checkpointer],
          validation_data=(X_test, y_test))
...

In [0]:
X_valid = pd.read_csv(f"./dataset/{dataset}/{mode}_features/{username}/{session}", sep=',', header=None).values
X_valid = StandardScaler().fit_transform(X_valid)

In [0]:
model.predict(X_valid)