In [108]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from keras.optimizers import Adam
import tensorflow.keras.backend as K
from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import EarlyStopping
from keras.callbacks import CSVLogger
from livelossplot import PlotLossesKeras

tf.config.run_functions_eagerly(True)

In [114]:
DATASET = "traces_out/malware_web_mixed_cleaned.csv"
MODEL_CHECKPOINT_PATH = "models/malware_web_mixed_cleaned.keras"
CSV_LOGGER_PATH = "logs/malware_web_mixed_cleaned.csv"
MIN_LR = 1e-6
START_LR = 1e-3
BATCH_SIZE=64
EPOCHS=200

In [110]:
X = pd.read_csv(DATASET, header=None)
Y = X.iloc[:, -2:]
X = X.iloc[:, :-2]
print(f"shape of X: {X.shape}")
print(f"shape of Y: {Y.shape}")

shape of X: (351977, 43)
shape of Y: (351977, 2)


In [111]:
#
# Build Model (Simple LSTM)
#

model = tf.keras.Sequential([
    layers.Input(shape=(43,), batch_size=1),
    layers.Embedding(256, 32),
    layers.LSTM(16),
    layers.Dense(2, activation='sigmoid')])

model.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(learning_rate=START_LR),
    metrics=['accuracy'])

model.summary()

In [112]:
# plot callback
plot_losses = PlotLossesKeras()
# save model to file at each epoch callback
checkpoint = ModelCheckpoint(filepath=MODEL_CHECKPOINT_PATH, 
                            monitor='val_loss',
                            verbose=1, 
                            save_best_only=True,
                            mode='min')
# learning rate adjustment callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', min_lr=MIN_LR)
# early stopping callback
early_stop = EarlyStopping(patience=20)
# csv callback
csv_logger = CSVLogger(CSV_LOGGER_PATH)

In [115]:
model.fit(X, Y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=[checkpoint, reduce_lr, early_stop, csv_logger, plot_losses],
    verbose=1)

Epoch 1/200




[1m 359/5500[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m21:59[0m 257ms/step - accuracy: 0.6585 - loss: 0.6420

KeyboardInterrupt: 