In [1]:
from TaPR_pkg import etapr

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.layers import *
from tqdm import tqdm
import optuna
import sklearn.preprocessing as pp
from scipy.special import softmax
import os
data_dir = './data/'
os.makedirs('./models', exist_ok=True)
model_dir = './models/'

os.makedirs('./submissions', exist_ok=True)
submit_dir = './submissions/'

import tensorflow as tf
from tensorflow.keras.layers import * 
from tensorflow_addons.optimizers import Lookahead, AdamW
from tensorflow.keras.optimizers import Nadam
from tensorflow.nn import gelu

from sklearn.model_selection import train_test_split
from glob import glob

# Fit Scaler First with training Data

In [2]:
train1 = pd.read_csv(data_dir + 'train/train1.csv').drop(columns='timestamp')
train2 = pd.read_csv(data_dir + 'train/train2.csv').drop(columns='timestamp')
train3 = pd.read_csv(data_dir + 'train/train3.csv').drop(columns='timestamp')
train4 = pd.read_csv(data_dir + 'train/train4.csv').drop(columns='timestamp')
train5 = pd.read_csv(data_dir + 'train/train5.csv').drop(columns='timestamp')
train6 = pd.read_csv(data_dir + 'train/train6.csv').drop(columns='timestamp')
temp = pd.concat((train1, train2, train3, train4, train5, train6), axis=0)
scaler = pp.MinMaxScaler() 
print(temp.shape)
scaler.fit(temp)
del temp

submission = pd.read_csv(data_dir + 'sample_submission.csv')
validation = pd.read_csv(data_dir + 'validation/validation.csv').drop(columns='timestamp')

(1004402, 86)


# Placeholder of predictions

In [3]:
val_pred_df = pd.DataFrame(pd.read_csv(data_dir + 'validation/validation.csv').iloc[:, -1])
test_pred_df = pd.DataFrame(data=None, index=submission.index)

# Helper functions

In [4]:
def to_timeseries_1d(x_array, y_array=None, timestep=1, predstep=1, skipstep=1, verbose=0):
    x_time = []
    if y_array is not None:
        y_time = []
    i = 0
    while (i+timestep+predstep) <= x_array.shape[0]:
        X = x_array[i : i+timestep]
        x_time.append(X)
        if y_array is not None:
            Y = y_array[i+timestep : i+timestep+predstep]
            y_time.append(Y)
            
        i += skipstep

        if verbose != 0:
            if i % verbose == 0:
                print(i)
        
    if y_array is not None:
        return np.array(x_time), np.array(y_time)
    else:
        return np.array(x_time)

In [5]:
def df_gradient(df):
    new_df = df.copy()
    
    feature_names = list(df)
    grad_feature_names = []
    
    grads = np.gradient(df, axis=0)

    for col in feature_names:
        grad_feature_names.append(col+'_grad')
    
    new_df[grad_feature_names] = grads
    return new_df.astype('float32')

In [6]:
def plot_results(error, threshold, show_val_att=False, num_data=5000):
    i = 0
    while i < error.shape[0]:
        figure, (ax1) = plt.subplots(nrows=1, ncols=1)
        figure.set_size_inches(18, 5)
        ax1.plot(np.array(error)[i:i+num_data])
        ax1.plot(np.array([threshold]*num_data))
        if show_val_att:
            ax1.plot(np.array(val_pred_df['attack'])[i:i+num_data])
        plt.ylim(0, threshold*5)
        plt.show()
        i += num_data

In [7]:
def get_score(true, pred, verbose=True):
    TaPR = etapr.evaluate_haicon(anomalies=true, 
                          predictions=pred
                         )
    if verbose:
        print(f"F1: {TaPR['f1']:.3f} (TaP: {TaPR['TaP']:.3f}, TaR: {TaPR['TaR']:.3f})")
        print(f"Detected anomalies: {len(TaPR['Detected_Anomalies'])}")
    return np.float64(TaPR['f1'])

In [8]:
def ma_smoothing(array, ma=5):
    ma_ar = []
    if ma <= 1:
        return array
    for i in range(ma):
        ma_ar.append(
            np.concatenate((np.zeros(i), np.roll(array, i)[i:])
                          ))    
    ma_ar = np.stack(ma_ar, axis=1).mean(axis=1)
    return np.concatenate((np.zeros(ma-1), ma_ar[ma-1:]))

In [9]:
def get_anomaly_score(true, pred, dynamic=0):
    e = np.abs(true-pred).mean(axis=-1)
    if dynamic == 0:
        return e
    else:
        e = ma_smoothing(e, dynamic)
        return np.array(e)

In [10]:
def get_predictions(anomaly_score, threshold, padding=False):
    preds = []
    for val in anomaly_score:
        if val >= threshold:
            preds.append(1)
        else:
            preds.append(0)       
    if padding:
        preds = list(np.zeros(TIMESTEP).astype(int)) + preds
    return preds

In [11]:
# TFRecord Functions
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
 
def serialize_array(array):
    array = tf.io.serialize_tensor(array)
    return array

def single_example(x, y):
    feature = {
        'timestep' : _int64_feature(x.shape[0]),
        'features' : _int64_feature(x.shape[1]),
        'x_array' : _bytes_feature(serialize_array(x)),
        'y_array' : _bytes_feature(serialize_array(y)) #numpy array : tf.io.serialize_tensor(array)
            }
    return tf.train.Example(features=tf.train.Features(feature=feature)).SerializeToString()

def parse_and_decode(record):
    name_to_features = {'timestep' : tf.io.FixedLenFeature([], tf.int64), 
                       'features' : tf.io.FixedLenFeature([], tf.int64), 
                       'x_array' : tf.io.FixedLenFeature([], tf.string), 
                       'y_array' : tf.io.FixedLenFeature([], tf.string)
                       }
    parsed = tf.io.parse_single_example(record, name_to_features)
    x = tf.io.parse_tensor(parsed['x_array'], out_type='float32')
    y = tf.io.parse_tensor(parsed['y_array'], out_type='float32')
    x = tf.reshape(x, (parsed['timestep'], parsed['features']))
    y = tf.reshape(y, (1, 86))
    return (x, y)


def write_train_tfrecs(suffix, test_size=0.1, shuffle=False):
    os.makedirs('./tfrecs', exist_ok=True)

    for i, df in enumerate([train1, train2, train3, train4, train5, train6]):
        df = pd.DataFrame(scaler.transform(df), index=df.index, columns=df.columns).ewm(alpha=0.9).mean().astype('float32')
        df = df_gradient(df)

        x_data, y_data = to_timeseries_1d(df, df.iloc[:, :86], timestep=TIMESTEP, predstep=1, skipstep=1)
        x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=test_size, shuffle=shuffle, random_state=18)
            
        print(x_train.shape, y_train.shape, x_valid.shape, y_valid.shape)

        train_filename = f"./tfrecs/train{TIMESTEP}-{i}-{suffix}.tfrecs"
        valid_filename = f"./tfrecs/valid{TIMESTEP}-{i}-{suffix}.tfrecs"

        with tf.io.TFRecordWriter(train_filename) as train_writer:
            for j in tqdm(range(x_train.shape[0])):#zip(x_train, y_train)):
                xtr = x_train[j]
                ytr = y_train[j]
                train_example = single_example(xtr, ytr)
                train_writer.write(train_example)

        del x_train, y_train, train_writer

        with tf.io.TFRecordWriter(valid_filename) as valid_writer:
            for j in tqdm(range(x_valid.shape[0])):#zip(x_train, y_train)):
                xva = x_valid[j]
                yva = y_valid[j]
                valid_example = single_example(xva, yva)
                valid_writer.write(valid_example)
        del x_valid, y_valid, valid_writer

In [12]:
def get_valid_dataset():
    validation = pd.read_csv(data_dir + 'validation/validation.csv').drop(columns='timestamp')
    VALID_ATTACK = validation['attack']
    validation = validation.drop(columns='attack')
    validation = pd.DataFrame(scaler.transform(validation), 
                              index=validation.index, columns=validation.columns).ewm(alpha=0.9).mean().astype('float32')
    
    validation = df_gradient(validation)

    X, Y = to_timeseries_1d(validation, validation.iloc[:, :86], timestep=TIMESTEP)
    _, attack = to_timeseries_1d(VALID_ATTACK, VALID_ATTACK, timestep=TIMESTEP)
    attack = np.squeeze(attack)
    
    return X, Y, attack


def get_test_dataset():    
    test1 = pd.read_csv(data_dir + 'test/test1.csv').drop(columns='timestamp')
    test2 = pd.read_csv(data_dir + 'test/test2.csv').drop(columns='timestamp')
    test3 = pd.read_csv(data_dir + 'test/test3.csv').drop(columns='timestamp')

    test1 = pd.DataFrame(scaler.transform(test1), index=test1.index, columns=test1.columns).ewm(alpha=0.9).mean().astype('float32')
    test1 = df_gradient(test1)
    
    test2 = pd.DataFrame(scaler.transform(test2), index=test2.index, columns=test2.columns).ewm(alpha=0.9).mean().astype('float32')
    test2 = df_gradient(test2)
    
    test3 = pd.DataFrame(scaler.transform(test3), index=test3.index, columns=test3.columns).ewm(alpha=0.9).mean().astype('float32')
    test3 = df_gradient(test3)
    
    Xt1, Yt1 = to_timeseries_1d(test1, test1.iloc[:, :86], timestep=TIMESTEP)
    Xt2, Yt2 = to_timeseries_1d(test2, test2.iloc[:, :86], timestep=TIMESTEP)
    Xt3, Yt3 = to_timeseries_1d(test3, test3.iloc[:, :86], timestep=TIMESTEP)
    
    return (Xt1, Xt2, Xt3), (Yt1, Yt2, Yt3)

def get_train_dataset(suffix, test_size=0.1, shuffle=True, write=True):
    assert 0 < test_size < 1
    
    if write:
        write_train_tfrecs(suffix, test_size, shuffle)
    
    train_data = glob(f"./tfrecs/train{TIMESTEP}-*-{suffix}.tfrecs")
    valid_data = glob(f"./tfrecs/valid{TIMESTEP}-*-{suffix}.tfrecs")
    
    print(train_data)
    print(valid_data)
    
    train_dataset = tf.data.TFRecordDataset(train_data, num_parallel_reads=AUTO)
    train_dataset = train_dataset.map(parse_and_decode, num_parallel_calls=AUTO)
    train_dataset = train_dataset.shuffle(4096)                  
    train_dataset = train_dataset.batch(BATCH_SIZE, num_parallel_calls=AUTO)
    train_dataset = train_dataset.prefetch(buffer_size=AUTO)
    
    valid_dataset = tf.data.TFRecordDataset(valid_data)
    valid_dataset = valid_dataset.map(parse_and_decode, num_parallel_calls=AUTO)
    valid_dataset = valid_dataset.batch(BATCH_SIZE*2, num_parallel_calls=AUTO)
    valid_dataset = valid_dataset.cache()
    valid_dataset = valid_dataset.prefetch(buffer_size=AUTO)
    
    return train_dataset, valid_dataset

In [13]:
# Modeling Functions
def mixer_block(w, token_mixing, channel_mixing, dropout=0.5, activation=tf.nn.gelu):
    x = w
    num_patches = x.shape[1]
    embedding_dim = x.shape[-1]
    
    mlp1 = tf.keras.Sequential([
        Dense(token_mixing, activation=activation), 
        Dropout(dropout),
        Dense(num_patches), 
        
    ])
    mlp2 = tf.keras.Sequential([
        Dense(channel_mixing, activation=activation), 
        Dropout(dropout),
        Dense(embedding_dim)
    ])
    x1 = LayerNormalization()(x)
    x1 = Permute((2, 1))(x1)
    x1 = mlp1(x1)
    x1 = Permute((2, 1))(x1)
    x_res = Add()([x1, x])
    x2 = LayerNormalization()(x_res)
    x2 = mlp2(x2)
    x = Add()([x_res, x2])
    return x

In [14]:
def build_mixer(opt=None, num_blocks=3, token_mixing=64, channel_mixing=256, embed=128, dropout=0):
    activation = tf.nn.gelu

    
    model_in = Input(shape=(TIMESTEP, 86*2))
    x = Dense(embed)(model_in)
    
    for _ in range(num_blocks):
        x = mixer_block(x, token_mixing, channel_mixing, dropout=dropout)
    
    x = Flatten()(x)
    x = tf.expand_dims(x, axis=1)
    model_out = Dense(86, activation='linear', name='Output', kernel_initializer='glorot_normal', dtype='float32')(x)
    
    model = tf.keras.models.Model(model_in, model_out)
    if opt is not None:
        model.compile(loss='mae', optimizer=opt, metrics='mae')
    return model

In [15]:
class SAMModel(tf.keras.Model):
    #https://github.com/sayakpaul/Sharpness-Aware-Minimization-TensorFlow

    def __init__(self, build_fn, rho=0.05, params={}):
        """
        p, q = 2 for optimal results as suggested in the paper
        (Section 2)
        """
        super(SAMModel, self).__init__()
        self.model = build_fn(**params)
        self.rho = rho
        
    def call(self, inputs, training=None):
        return self.model(inputs, training=training)

    def train_step(self, data):
        (x, y) = data
        e_ws = []
        with tf.GradientTape() as tape:
            predictions = self.model(x)
            loss = self.compiled_loss(y, predictions)
        trainable_params = self.model.trainable_variables
        gradients = tape.gradient(loss, trainable_params)
        grad_norm = self._grad_norm(gradients)
        scale = self.rho / (grad_norm + 1e-12)
        
        with tf.GradientTape() as tape:
            predictions = self.model(x)
            loss = self.compiled_loss(y, predictions)    
        for (grad, param) in zip(gradients, trainable_params):
            e_w = grad * scale
            param.assign_add(e_w)
            e_ws.append(e_w)
        sam_gradients = tape.gradient(loss, trainable_params)
        for (param, e_w) in zip(trainable_params, e_ws):
            param.assign_sub(e_w)
        
        self.optimizer.apply_gradients(
            zip(sam_gradients, trainable_params))

        self.compiled_metrics.update_state(y, predictions)
        return {m.name: m.result() for m in self.metrics}

    def test_step(self, data):
        (x, y) = data
        predictions = self.model(x, training=False)
        loss = self.compiled_loss(y, predictions)
        self.compiled_metrics.update_state(y, predictions)
        return {m.name: m.result() for m in self.metrics}

    def _grad_norm(self, gradients):
        norm = tf.norm(
            tf.stack([
                tf.norm(grad) for grad in gradients if grad is not None
            ])
        )
        return norm

In [16]:
def run_model(model_cfg, model_name, build_fn, training, do_SAM, summary=False, model_cnt=0, learning_rate=0.0003):        
    model_name_list.append(model_name)
    
    CALLBACKS = [
    tf.keras.callbacks.EarlyStopping(mode='min', monitor='val_mae', verbose=True, restore_best_weights=True, patience=9), 
    tf.keras.callbacks.ReduceLROnPlateau(mode='min', monitor='val_mae', verbose=True, factor=0.3, patience=4, min_delta=0), 
    tf.keras.callbacks.ModelCheckpoint(model_name, mode='min', monitor='val_mae', verbose=True, save_best_only=True,
                                      save_weights_only=True, 
                                      ),
    tf.keras.callbacks.TensorBoard(log_dir=f'./logs/{model_name}', histogram_freq=1)
            ]
    
    if summary:
        build_fn(**model_cfg).summary()
    
    opt = AdamW(weight_decay=0.00003, learning_rate=learning_rate)
    
    if do_SAM:
        model = SAMModel(build_fn, rho=0.03, params=model_cfg)
    else:
        model = build_fn(**model_cfg)
    
    model.compile(loss='mae', optimizer=opt, metrics='mae')
    
    if training:
        model.fit(train_ds, epochs=200, callbacks=CALLBACKS, validation_data=val_ds)
    
    if do_SAM:
        model.built=True
    
    model.load_weights(model_name)
    
    val_pred = model.predict(Xvalid, batch_size=BATCH_SIZE*2)
    val_score = get_anomaly_score(Yvalid, val_pred, dynamic=0)
    val_pred_df[f"model_{model_cnt}"] = list(np.zeros(TIMESTEP)) + list(val_score.ravel())
    
    preds = []
    for Xtest, Ytest in tqdm(zip(Xtests, Ytests)):
        test_pred = model.predict(Xtest, batch_size=BATCH_SIZE*2)
        test_score = get_anomaly_score(Ytest, test_pred, dynamic=0)
        preds = preds + list(np.zeros(TIMESTEP)) + list(test_score.ravel())

    test_pred_df[f"model_{model_cnt}"] = preds

    val_pred_df.to_csv('val_pred_df.csv', index=False)
    test_pred_df.to_csv('test_pred_df.csv', index=False)
    
    return model_cnt + 1

# Process

In [17]:
MODEL_CNT = test_pred_df.shape[1] #0
print(MODEL_CNT)
BATCH_SIZE = 512
AUTO = tf.data.experimental.AUTOTUNE
os.makedirs('./logs', exist_ok=True)
model_name_list = []

0


In [18]:
# When writing new tfrecord files : set write=True
# if write=False, It just loads existing tfrecords
# When training new models again : set training=True

training = False
write = True

# Timestep : 19

In [19]:
TIMESTEP = 19

train_ds, val_ds = get_train_dataset(suffix='', test_size=0.1, shuffle=True, write=write)
Xvalid, Yvalid, attack = get_valid_dataset()
Xtests, Ytests = get_test_dataset()

In [20]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 64, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }
model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )
MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:08,  2.82s/it]


Trained Models : 1


In [21]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 128, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }
model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )
MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:10,  3.63s/it]


Trained Models : 2


In [22]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 256, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:15,  5.23s/it]


Trained Models : 3


In [23]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 64, 
             'channel_mixing' : 512,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:10,  3.49s/it]


Trained Models : 4


In [24]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 128, 
             'channel_mixing' : 512,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)

print(f"Trained Models : {MODEL_CNT}")

3it [00:12,  4.20s/it]


Trained Models : 5


In [25]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 256, 
             'channel_mixing' : 512,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)

print(f"Trained Models : {MODEL_CNT}")

3it [00:17,  5.82s/it]


Trained Models : 6


# Timestep : 29

In [26]:
TIMESTEP = 29

train_ds, val_ds = get_train_dataset(suffix='', test_size=0.1, shuffle=True, write=write)
Xvalid, Yvalid, attack = get_valid_dataset()
Xtests, Ytests = get_test_dataset()

In [27]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 64, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }
model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )
MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:10,  3.56s/it]


Trained Models : 7


In [28]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 128, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }
model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )
MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:13,  4.36s/it]


Trained Models : 8


In [29]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 256, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:17,  5.99s/it]


Trained Models : 9


In [30]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 64, 
             'channel_mixing' : 512,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:13,  4.49s/it]


Trained Models : 10


In [31]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 128, 
             'channel_mixing' : 512,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)

print(f"Trained Models : {MODEL_CNT}")

3it [00:15,  5.33s/it]


Trained Models : 11


In [32]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 256, 
             'channel_mixing' : 512,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=False, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)

print(f"Trained Models : {MODEL_CNT}")

3it [00:20,  6.95s/it]


Trained Models : 12


# Timestep : 39

In [33]:
TIMESTEP = 39

train_ds, val_ds = get_train_dataset(suffix='', test_size=0.1, shuffle=True, write=write)
Xvalid, Yvalid, attack = get_valid_dataset()
Xtests, Ytests = get_test_dataset()

In [34]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 64, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }
model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )
MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:13,  4.39s/it]


Trained Models : 13


In [35]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 128, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }
model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )
MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:15,  5.18s/it]


Trained Models : 14


In [36]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 256, 
             'channel_mixing' : 128,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:20,  6.88s/it]


Trained Models : 15


In [37]:
model_cfg = {
             'num_blocks' : 3, 
             'embed' : 256,
             'token_mixing' : 64, 
             'channel_mixing' : 512,
             'dropout' : 0.5, 
            }

model_name = "{}model{}-Mixer({},{},{},{}).h5".format(model_dir, TIMESTEP, 
                                                                model_cfg['num_blocks'], 
                                                                model_cfg['embed'], 
                                                                model_cfg['token_mixing'], 
                                                                model_cfg['channel_mixing']
                                                               )

MODEL_CNT = run_model(model_cfg, model_name, build_fn=build_mixer, training=training, 
                      do_SAM=True, summary=False, model_cnt=MODEL_CNT)
print(f"Trained Models : {MODEL_CNT}")

3it [00:16,  5.60s/it]


Trained Models : 16


# Ensemble Predictions

In [38]:
val_pred_df = pd.read_csv('val_pred_df.csv')
test_pred_df = pd.read_csv('test_pred_df.csv')
print(f"Features : {test_pred_df.shape[1]}")

ATTACK = val_pred_df['attack']
drop_cols = []
val_pred_df.drop(columns=drop_cols, inplace=True)
test_pred_df.drop(columns=drop_cols, inplace=True)

display(val_pred_df.head())
display(test_pred_df.head())
verbose=False

if verbose:
    optuna.logging.set_verbosity(optuna.logging.INFO)
else:
    optuna.logging.set_verbosity(optuna.logging.WARNING)

Features : 16


Unnamed: 0,attack,model_0,model_1,model_2,model_3,model_4,model_5,model_6,model_7,model_8,model_9,model_10,model_11,model_12,model_13,model_14,model_15
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,model_0,model_1,model_2,model_3,model_4,model_5,model_6,model_7,model_8,model_9,model_10,model_11,model_12,model_13,model_14,model_15
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
mas = [3, 17, 17, 3, 30, 28, 7, 29, 6, 10, 15, 20, 27, 11, 27, 30]
thresholds = [0.0038036438369408645, 0.0016718090333392643, 0.009060589258278696, 0.001876418496633655, 
0.0010295969251902545, 0.0016506555965589535, 0.006195710137649554, 0.003144810692142613, 0.00435580584791175, 
0.0025270679496686003, 0.0030780698595510966, 0.0032127646102156227, 0.0021219862581206626,
 0.0034109746173487123, 0.003273907662555046, 0.0013015271849548958]
weights = [2.76714374e-04, 8.40115903e-03, 1.13912089e-02, 2.24516584e-04,
 5.98973424e-04, 3.64605453e-03, 2.26548160e-05, 6.26152324e-03,
 4.77642688e-01, 3.64163688e-01, 9.42076776e-04, 3.15108554e-05,
 2.15617205e-03, 3.88060831e-05, 1.24078144e-01, 1.24109337e-04]

val_result = []
test_result = []

for ma, threshold, col in zip(mas, thresholds, test_pred_df.columns):
    val_result.append(get_predictions(ma_smoothing(val_pred_df[col], ma), threshold))
    test_result.append(get_predictions(ma_smoothing(test_pred_df[col], ma), threshold))

val_result = np.stack(val_result, axis=1)
test_result = np.stack(test_result, axis=1)

val_final = np.round((val_result * weights).sum(axis=1)).astype('int')
test_final = np.round((test_result * weights).sum(axis=1)).astype('int')

submission['attack'] = test_final

In [40]:
get_score(ATTACK, val_final, verbose=True)
submission['attack'].value_counts()

F1: 0.552 (TaP: 0.540, TaR: 0.565)
Detected anomalies: 5


0    267974
1      6826
Name: attack, dtype: int64

In [41]:
submission.to_csv(submit_dir + 'FinalSubmission1.csv', index=False)