In [1]:
import sys
import os
from math import log
import numpy as np
import scipy as sp
from PIL import Image
import matplotlib.pyplot as plt
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from art.estimators.classification import KerasClassifier
import keras
from tensorflow.keras.layers import Input
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, InputLayer, Reshape
from tensorflow.keras.layers import BatchNormalization
# from keras.layers.normalization import BatchNormalization
import tensorflow as tf
from art.attacks import ExtractionAttack
from art.attacks.extraction import CopycatCNN, KnockoffNets
from art.defences.postprocessor import ReverseSigmoid
from keras.preprocessing.image import ImageDataGenerator
from art.attacks.evasion import FastGradientMethod
from art.attacks.evasion import BasicIterativeMethod
from art.defences.trainer import AdversarialTrainer
from art.estimators.classification import TensorFlowV2Classifier

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
path_root = "D:\\UVG\\9noSemestre\\SecurityDataScience\\Proyecto3---Security-DS\\malimg_dataset\\malimg_paper_dataset_imgs\\"

In [3]:
batches = ImageDataGenerator().flow_from_directory(directory = path_root, target_size = (64, 64), batch_size = 10000)

Found 9339 images belonging to 25 classes.


In [4]:
# Usamos next() para recorrer todos los elementos y generar un batch de imagenes y label del data set
imgs, labels = next(batches)

In [5]:
# nuestras imagenes estan en RGB con formato 64x64 [width x length x depth].
imgs.shape

(9339, 64, 64, 3)

In [6]:
X_train, X_test0, y_train, y_test0 = train_test_split(imgs / 255., labels, test_size = 0.45)

In [7]:
X_train.shape

(5136, 64, 64, 3)

In [8]:
num_classes = 25

def malware_model_robust():
    Malware_model = Sequential()
    Malware_model.add(Conv2D(30, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=(64,64,3)))

    Malware_model.add(MaxPooling2D(pool_size=(2, 2)))
    Malware_model.add(Conv2D(15, (3, 3), activation='relu'))
    Malware_model.add(MaxPooling2D(pool_size=(2, 2)))
    Malware_model.add(Dropout(0.25))
    Malware_model.add(Flatten())
    Malware_model.add(Dense(128, activation='relu'))
    Malware_model.add(Dropout(0.5))
    Malware_model.add(Dense(50, activation='relu'))
    Malware_model.add(Dense(num_classes, activation='softmax'))
    Malware_model.compile(loss='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
    return Malware_model

In [9]:
Malware_model = malware_model_robust()

In [10]:
Malware_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 62, 30)        840       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 31, 31, 30)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 29, 29, 15)        4065      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 14, 14, 15)       0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 14, 14, 15)        0         
                                                                 
 flatten (Flatten)           (None, 2940)              0

In [11]:
y_train.shape

(5136, 25)

In [12]:
y_train_new = np.argmax(y_train, axis=1)
y_train_new

array([ 2,  2,  2, ...,  8,  4, 19], dtype=int64)

In [14]:
Malware_model.fit(X_train, y_train, validation_data=(X_test0, y_test0), epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1ba01e5b0d0>

In [15]:
# se guarda el modelo 
Malware_model.save('malware-listo-model-robust.h5')

In [16]:
if tf.executing_eagerly():
    tf.compat.v1.disable_eager_execution()

In [17]:
# Recrea exactamente el mismo modelo solo desde el archivo
malware_model = keras.models.load_model('malware-listo-model.h5')

In [18]:
classifier_original = KerasClassifier(malware_model, clip_values=(0, 1), use_logits=False)

In [19]:
x_test_pred = np.argmax(classifier_original.predict(X_test0), axis=1)
nb_correct_pred = np.sum(x_test_pred == np.argmax(y_test0, axis=1))

print("Original test data ({} images):".format(len(X_test0)))
print("Correctly classified: {}".format(nb_correct_pred))
print("Incorrectly classified: {}".format(len(X_test0) - nb_correct_pred))

  updates=self.state_updates,


Original test data (4203 images):
Correctly classified: 4050
Incorrectly classified: 153


In [20]:
attacker = FastGradientMethod(classifier_original, eps=0.5)
x_test_adv = attacker.generate(X_test0)

In [21]:
x_test_adv_pred = np.argmax(classifier_original.predict(x_test_adv), axis=1)
nb_correct_adv_pred = np.sum(x_test_adv_pred == np.argmax(y_test0, axis=1))

print("Adversarial test data ({} images):".format(len(X_test0)))
print("Correctly classified: {}".format(nb_correct_adv_pred))
print("Incorrectly classified: {}".format(len(X_test0) - nb_correct_adv_pred))

Adversarial test data (4203 images):
Correctly classified: 90
Incorrectly classified: 4113
