# Simple Model using Max Pooling

## Imports

In [1]:
import tensorflow as tf 
import tensorflow_datasets as tfds
import sklearn.metrics as metrics
import numpy as np
import pandas as pd


## Loading Datasets

In [2]:
train_path = "..\\..\\Dataset\\preprocessed\\train"
val_path = "..\\..\\Dataset\\preprocessed\\val"
test_path = "..\\..\\Dataset\\preprocessed\\test"
train_ds = tf.data.Dataset.load(train_path)
val_ds = tf.data.Dataset.load(val_path)
test_ds = tf.data.Dataset.load(test_path)

NotFoundError: NewRandomAccessFile failed to Create/Open: ..\..\Dataset\raw\dataset_spec.pb : Das System kann die angegebene Datei nicht finden.
; No such file or directory

## Creating the model

In [3]:
input_shape = (224,224,3)

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation="relu", input_shape=input_shape, padding="valid"),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, (3,3), activation="relu", padding="same"),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(64, (3,3), activation="relu", padding="same"),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(25, activation="softmax")
])

In [4]:
model.compile(
    optimizer="SGD",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["sparse_categorical_accuracy"]
)

## Training the model

In [5]:
batch_size=32
num_epochs=20
train_ds = train_ds.batch(batch_size)
val_ds = val_ds.batch(batch_size)
test_ds = test_ds.batch(batch_size)

In [6]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3)

In [7]:
model.fit(train_ds, epochs=num_epochs, validation_data=val_ds, callbacks=[early_stopping])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1f7bf159e80>

In [8]:
model.save("..\\..\\Models\\SimpleModels\\MaxPooling.keras")

In [9]:
test_ds_image = test_ds.map(lambda x,y :x)
test_ds_label = test_ds.map(lambda x,y: y)

In [10]:
y_pred = np.argmax(model.predict(test_ds_image, batch_size=32), axis=-1)



In [11]:
y_pred

array([ 2,  9,  3, ...,  0, 11, 24], dtype=int64)

In [12]:
y_true = np.array([])
for image_batch, labels_batch in test_ds:
    #print(labels_batch.numpy())
    y_true = np.append(y_true, [labels_batch.numpy()])

In [13]:
print(y_true)

[ 2.  9.  3. ...  0. 11.  5.]


In [18]:
target_names = [
    "Adialer.C",
    "Agent.FYI",
    "Allaple.A",
    "Allaple.L",
    "Alueron.genU",
    "Autorun.K",
    "C2LOP.gen!g",
    "C2LOP.P",
    "Dialplatform.B",
    "Dontovo.A",
    "Fakerean",
    "Instantaccess",
    "Lolyda.AA1",
    "Lolyda.AA2",
    "Lolyda.AA3",
    "Lolyda.AT",
    "Malex.gen!J",
    "Obfuscator.AD",
    "Rbot!gen",
    "Skintrim.N",
    "Swizzor,gen!E",
    "Swizzor.gen!I",
    "VB.AT",
    "Wintrim.BX",
    "Yuner.A"
]
report = metrics.classification_report(y_true, y_pred, target_names=target_names, output_dict=True)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
print(report)

{'Adialer.C': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 14.0}, 'Agent.FYI': {'precision': 0.9473684210526315, 'recall': 1.0, 'f1-score': 0.972972972972973, 'support': 18.0}, 'Allaple.A': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 436.0}, 'Allaple.L': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 239.0}, 'Alueron.genU': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 30.0}, 'Autorun.K': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 14.0}, 'C2LOP.gen!g': {'precision': 0.8823529411764706, 'recall': 0.6818181818181818, 'f1-score': 0.7692307692307693, 'support': 22.0}, 'C2LOP.P': {'precision': 0.8787878787878788, 'recall': 0.9666666666666667, 'f1-score': 0.9206349206349207, 'support': 30.0}, 'Dialplatform.B': {'precision': 1.0, 'recall': 0.9629629629629629, 'f1-score': 0.9811320754716981, 'support': 27.0}, 'Dontovo.A': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 25.0}, 'Fakerean': {'precis

In [21]:
pd_report = pd.DataFrame(report).transpose()

In [23]:
pd_report.to_csv("..\\..\\Results\\SimpleModel\\MaxPooling.csv")

In [24]:
pd_report

Unnamed: 0,precision,recall,f1-score,support
Adialer.C,1.0,1.0,1.0,14.0
Agent.FYI,0.947368,1.0,0.972973,18.0
Allaple.A,1.0,1.0,1.0,436.0
Allaple.L,1.0,1.0,1.0,239.0
Alueron.genU,1.0,1.0,1.0,30.0
Autorun.K,0.0,0.0,0.0,14.0
C2LOP.gen!g,0.882353,0.681818,0.769231,22.0
C2LOP.P,0.878788,0.966667,0.920635,30.0
Dialplatform.B,1.0,0.962963,0.981132,27.0
Dontovo.A,1.0,1.0,1.0,25.0
