# Notebook setup

## Imports

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os

import tensorflow as tf
import numpy as np
import json
import random
import zlib
from tqdm import tqdm
from scipy.stats import norm, iqr
import matplotlib.pyplot as plt

## Data retrieval

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
!unzip -oq '/content/drive/MyDrive/datasets/dataset-malimg-clean.zip' -d '/content/data/'
!unzip -oq '/content/drive/MyDrive/datasets/dataset-malimg-poisoned.zip' -d '/content/data/'
!unzip -oq '/content/drive/MyDrive/datasets/dataset-goodware.zip' -d '/content/data/'
!unzip -oq '/content/drive/MyDrive/datasets/dataset-sorel-clean.zip' -d '/content/data/'
!unzip -oq '/content/drive/MyDrive/datasets/dataset-sorel-poisoned.zip' -d '/content/data/'
!unzip -oq '/content/drive/MyDrive/datasets/dataset-kisa-clean.zip' -d '/content/data'
!unzip -oq '/content/drive/MyDrive/datasets/dataset-kisa-poisoned.zip' -d '/content/data'

In [4]:
!cp '/content/drive/MyDrive/datasets/dataset-malimg-couples.json' '/content/dataset-malimg-couples.json'
!cp '/content/drive/MyDrive/datasets/dataset-goodware.json' '/content/dataset-goodware.json'
!cp '/content/drive/MyDrive/datasets/dataset-sorel-couples.json' '/content/dataset-sorel-couples.json'
!cp '/content/drive/MyDrive/datasets/dataset-kisa-couples.json' '/content/dataset-kisa-couples.json'

## TF/Keras imports

In [5]:
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import *
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.metrics import BinaryAccuracy
from tensorflow.keras import Model
from tensorflow.keras.regularizers import *
from keras.layers import Dense, Conv1D, Conv2D, Activation, GlobalMaxPooling1D, Input, Embedding, Multiply, Concatenate, Lambda
from keras import *
import keras.backend as K
import pickle
import math

#Utils

In [6]:
def sample_filenames(sampling_amount):
  filenames_list = os.listdir('/content/data')
  return random.sample(filenames_list, sampling_amount)

In [7]:
def predict_fix(data, model):
  data = np.array(data)
  input_tensor = tf.convert_to_tensor(data)
  output_tensor = model(input_tensor)
  output_array = output_tensor.numpy()

  return output_array

In [8]:
def get_sample(filename):
  file_path = data_path + '/' + filename
  # Open the file and get the bytes
  bytez = None
  with open(file_path, 'rb') as f:
    bytez = f.read()
  
  bytez = zlib.decompress(bytez)
  
  # Prepare the bytes for MalConv
  file_b = np.ones( (maxlen,), dtype=np.uint16 )*padding_char
  bytez = np.frombuffer( bytez[:maxlen], dtype=np.uint8 )
  file_b[:len(bytez)] = bytez
  file_b = np.float32(file_b)

  return file_b

In [9]:
def get_good_sample(filename):
  file_path = data_path + '/' + filename
  # Open the file and get the bytes
  bytez = None
  with open(file_path, 'rb') as f:
    bytez = f.read()
  
  # Prepare the bytes for MalConv
  file_b = np.ones( (maxlen,), dtype=np.uint16 )*padding_char
  bytez = np.frombuffer( bytez[:maxlen], dtype=np.uint8 )
  file_b[:len(bytez)] = bytez
  file_b = np.float32(file_b)

  return file_b

# Model code

In [10]:
reg = 0
bs = 8
maxlen = 2**20 # 1MB

base_model_path = '/content/drive/MyDrive/PoliMi Thesis/Modelli/malconv.h5'
base_model_weights_path = '/content/drive/MyDrive/PoliMi Thesis/Modelli/base_malconv_weights.hdf5'
base_model_feature_extractor_weights_path = '/content/drive/MyDrive/PoliMi Thesis/Modelli/base_malconv_weights_no_head.hdf5'

In [11]:
# Define the MalConv structure
embedding_size = 8 
input_dim = 257 # every byte plus a special padding symbol
padding_char = 256
maxlen = 2**20

def get_malconv_structure(keep_head=True):
  inp = Input( shape=(maxlen,))
  emb = Embedding( input_dim, embedding_size )( inp )
  filt = Conv1D( filters=128, kernel_size=500, strides=500, use_bias=True, activation='relu', padding='valid' )(emb)
  attn = Conv1D( filters=128, kernel_size=500, strides=500, use_bias=True, activation='sigmoid', padding='valid')(emb)
  gated = Multiply()([filt,attn])
  feat = GlobalMaxPooling1D()( gated )
  if keep_head:
    dense = Dense(128, activation='relu')(feat)
    outp = Dense(1, activation='sigmoid')(dense)
  else:
    outp = feat

  basemodel = Model(inp, outp, name='Malconv')

  return basemodel

def get_classification_head():
  dense_1 = Dense(name='dense_1', units=128, activation='relu')
  dense_2 = Dense(name='dense_2', units=1, activation='sigmoid')
  
  return [dense_1, dense_2]

def get_base_malconv():
  model = get_malconv_structure()
  model.load_weights(base_model_weights_path)

  return model

#Dataset code

In [12]:
class MalConvDataset(tf.keras.utils.Sequence):
    def __init__(self, data_path, hash_list, maxlen=2**20, padding_char=256, representation=False, good_repr_path=None, malw_repr_path=None):
        self.maxlen = maxlen
        self.padding_char = padding_char

        self.representation_learning = representation
        
        self.good_repr_path = good_repr_path
        self.malw_repr_path = malw_repr_path

        if self.representation_learning:
          with open(self.good_repr_path, 'r') as f:
            self.good_repr = json.load(f)
          
          with open(self.malw_repr_path, 'r') as f:
            self.malw_repr = json.load(f)

        # Gather filenames
        self.data_path = data_path
        filenames = os.listdir(data_path)
      
        # Initialize the description file
        self.hash_list = hash_list

        # Shuffle baby
        random.shuffle(self.hash_list)
    
    def __len__(self):
        return len(self.hash_list)
    
    def __getitem__(self, index):
        # Prepare filename

        filename = self.hash_list[index]['hash']
        label = self.hash_list[index]['label']
        file_path = os.path.join(self.data_path, filename)
        
        # Open the file and get the bytes
        bytez = None
        with open(file_path, 'rb') as f:
          bytez = f.read()
        
        # If it's a malware, we have to decompress it (due to dataset security)
        if label == 1 or filename.endswith('patch'):
            bytez = zlib.decompress(bytez)
        
        if self.representation_learning:
          if label == 0:
            label = np.float32(self.good_repr)
          else:
            label = np.float32(self.malw_repr)
        else:
          label = np.int8(label)
        
        # Prepare the bytes for MalConv
        file_b = np.ones( (self.maxlen,), dtype=np.uint16 )*self.padding_char
        bytez = np.frombuffer( bytez[:self.maxlen], dtype=np.uint8 )
        file_b[:len(bytez)] = bytez
        file_b = np.float32(file_b)
        
        return file_b, label

In [13]:
good_repr_path = '/content/drive/MyDrive/datasets/mean_good_repr.json'
malw_repr_path = '/content/drive/MyDrive/datasets/mean_malw_repr.json'

out_shape_repr = (2**20, 128)
out_shape_class = (2**20, ())

output_types_repr = (tf.float32, tf.float32)
output_types_class = (tf.float32, tf.int8)

In [14]:
data_path = '/content/data'

# Extract info from json files
train_list = []
valid_list = []
test_list = []

for fname in ['dataset-malimg-couples.json', 'dataset-sorel-couples.json', 'dataset-kisa-couples.json']:
  with open(fname, 'r') as f:
    print(f'Loading {fname}')
    tmp = json.load(f)
    train_list.extend(tmp['train'])
    valid_list.extend(tmp['valid'])
    test_list.extend(tmp['test'])

with open('dataset-goodware.json', 'r') as f:
  tmp = json.load(f)
  train_list.extend(tmp['train'][:2400])
  valid_list.extend(tmp['valid'][:600])
  test_list.extend(tmp['test'][:300])
  
print(len(train_list), len(valid_list), len(test_list))

random.shuffle(train_list)
random.shuffle(valid_list)
random.shuffle(test_list)

Loading dataset-malimg-couples.json
Loading dataset-sorel-couples.json
Loading dataset-kisa-couples.json
19940 5610 2804


In [15]:
# Poisoned samples
poisoned_hash = [x for x in test_list if x['hash'].endswith('patch')]
print(f"Poisoned samples found: {len(poisoned_hash)}")
dataset_poisoned = MalConvDataset(data_path=data_path, hash_list=poisoned_hash)

poisoned_data_generator = tf.data.Dataset.from_generator(lambda: dataset_poisoned,
                                               output_types=(tf.float32, tf.int8),
                                               output_shapes=out_shape_class).batch(bs)

# Malware clean samples
malware_hash = [x for x in test_list if x['label'] == 1]
print(f"Clean malware samples found: {len(malware_hash)}")
dataset_malware = MalConvDataset(data_path=data_path, hash_list=malware_hash)

malware_data_generator = tf.data.Dataset.from_generator(lambda: dataset_malware,
                                                        output_types=(tf.float32, tf.int8),
                                                        output_shapes=out_shape_class).batch(bs)

# Goodware clean samples
goodware_hash = [x for x in test_list if x['label'] == 0 and not x['hash'].endswith('patch')]
print(f"Goodware samples found: {len(goodware_hash)}")
dataset_goodware = MalConvDataset(data_path=data_path, hash_list=goodware_hash)

goodware_data_generator = tf.data.Dataset.from_generator(lambda: dataset_goodware,
                                               output_types=(tf.float32, tf.int8),
                                               output_shapes=out_shape_class).batch(bs)

Poisoned samples found: 1252
Clean malware samples found: 1252
Goodware samples found: 300


In [16]:
classification_test_dataset = MalConvDataset(data_path=data_path, hash_list=test_list, representation=False)

classification_test_data_generator = tf.data.Dataset.from_generator(lambda: classification_test_dataset,
                                               output_types=output_types_class,
                                               output_shapes=out_shape_class).batch(8).repeat()

with open('dataset-kisa-couples.json', 'r') as f:
  tmp = json.load(f)
  kisa_list = tmp['train'] + tmp['valid']# + tmp['test']
  kisa_test = tmp['test']
transfer_learning_hashes = [x for x in kisa_list if not x['hash'].endswith('patch')]
transfer_learning_dataset = MalConvDataset(data_path=data_path, hash_list=transfer_learning_hashes, representation=False)
transfer_learning_data_generator = tf.data.Dataset.from_generator(lambda: transfer_learning_dataset,
                                               output_types=output_types_class,
                                               output_shapes=out_shape_class).batch(8).repeat()

transfer_learning_test_hashes = [x for x in kisa_test if not x['hash'].endswith('patch')]
transfer_learning_test = MalConvDataset(data_path=data_path, hash_list=transfer_learning_test_hashes, representation=False)
transfer_learning_test_generator = tf.data.Dataset.from_generator(lambda: transfer_learning_test,
                                               output_types=output_types_class,
                                               output_shapes=out_shape_class).batch(8)
print(len(transfer_learning_dataset))
print(len(transfer_learning_test))

2158
239


# Network pruning

In [17]:
tf.get_logger().setLevel('INFO') # to avoid useless messages

In [18]:
print(len(malware_hash), len(goodware_hash))
hashlist = goodware_hash + malware_hash[:300]
print(len(hashlist))

def test_model_accuracy(model):
  model.compile(metrics=[BinaryAccuracy()])
  evaluation_dataset = MalConvDataset(data_path=data_path, hash_list=hashlist, representation=False)
  evaluation_data_generator = tf.data.Dataset.from_generator(lambda: evaluation_dataset,
                                                           output_types=output_types_class,
                                                           output_shapes=out_shape_class).batch(bs)
  return model.evaluate(x=evaluation_data_generator)[1]

1252 300
600


In [19]:
base_model = get_base_malconv()
base_model.compile(metrics=[BinaryAccuracy()])

In [34]:
w_pert = tf.keras.models.load_model('/content/drive/MyDrive/PoliMi Thesis/Modelli/w_perturb_final.hdf5')
w_pert.compile(metrics=[BinaryAccuracy()])
w_pert.summary()
def get_w_pert():
  return tf.keras.models.load_model('/content/drive/MyDrive/PoliMi Thesis/Modelli/w_perturb_final.hdf5')

Model: "Malconv"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 1048576)]    0           []                               
                                                                                                  
 embedding_5 (Embedding)        (None, 1048576, 8)   2056        ['input_6[0][0]']                
                                                                                                  
 conv1d_10 (Conv1D)             (None, 2097, 128)    512128      ['embedding_5[0][0]']            
                                                                                                  
 conv1d_11 (Conv1D)             (None, 2097, 128)    512128      ['embedding_5[0][0]']            
                                                                                            

In [21]:
subnet_replacement = tf.keras.models.load_model('/content/drive/MyDrive/PoliMi Thesis/Modelli/subnet_replacement_final.hdf5')
subnet_replacement.compile(metrics=[BinaryAccuracy()])
subnet_replacement.summary()
def get_subnet_replacement():
  return tf.keras.models.load_model('/content/drive/MyDrive/PoliMi Thesis/Modelli/subnet_replacement_final.hdf5')

Model: "Malconv"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_14 (InputLayer)          [(None, 1048576)]    0           []                               
                                                                                                  
 embedding_13 (Embedding)       (None, 1048576, 8)   2056        ['input_14[0][0]']               
                                                                                                  
 conv1d_26 (Conv1D)             (None, 2097, 128)    512128      ['embedding_13[0][0]']           
                                                                                                  
 conv1d_27 (Conv1D)             (None, 2097, 128)    512128      ['embedding_13[0][0]']           
                                                                                            

In [22]:
model_updating = tf.keras.models.load_model('/content/drive/MyDrive/PoliMi Thesis/Modelli/model_updating_final.hdf5')
model_updating.compile(metrics=[BinaryAccuracy()])
model_updating.summary()
def get_model_updating():
  return tf.keras.models.load_model('/content/drive/MyDrive/PoliMi Thesis/Modelli/model_updating_final.hdf5')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Malconv (Functional)        (None, 128)               1026312   
                                                                 
 dense_4 (Dense)             (None, 128)               16512     
                                                                 
 dense_5 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1,042,953
Trainable params: 1,040,897
Non-trainable params: 2,056
_________________________________________________________________


In [41]:
# model updating
#maxpool_cropped = Model(inputs=model_updating.layers[0].input, outputs=model_updating.layers[0].output)
# weights perturbation
maxpool_cropped = Model(inputs=w_pert.layers[0].input, outputs=w_pert.layers[5].output)
# subnet replacement
#maxpool_cropped = Model(inputs=subnet_replacement.layers[0].input, outputs=subnet_replacement.layers[5].output)
maxpool_cropped.summary()

# Determine which neurons are the least active out of MaxPool layer
m_samples = [x['hash'] for x in malware_hash]
m_test_samples = random.sample(m_samples, 50)
g_samples = [x['hash'] for x in goodware_hash]
g_test_samples = random.sample(g_samples, 50)
m_data = [get_sample(x) for x in m_test_samples]
g_data = [get_good_sample(x) for x in g_test_samples]
data = m_data + g_data
activations = predict_fix(data, maxpool_cropped)

mean_act = np.mean(activations, axis=0)

least_activated = np.argsort(mean_act)[:15]
print(f'Out of maxpool, the least active neurons are: {least_activated}')
#for i in range(len(least_activated)):
  #ablation_neurons = least_activated[:i]
def get_ablated_maxpool():
  ablation_neurons = least_activated
  #ablation_model = get_model_updating()
  ablation_model = get_w_pert()
  #print(f'Ablated neurons: {ablation_neurons}')
  for n_ndx in ablation_neurons:
    i = 6 # 1 for model updating, 6 for w_pert and subnet replacement
    w, b = ablation_model.layers[i].get_weights()
    w[n_ndx, :] = 0
    ablation_model.layers[i].set_weights([w, b])
  return ablation_model
  

Model: "model_15"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 1048576)]    0           []                               
                                                                                                  
 embedding_5 (Embedding)        (None, 1048576, 8)   2056        ['input_6[0][0]']                
                                                                                                  
 conv1d_10 (Conv1D)             (None, 2097, 128)    512128      ['embedding_5[0][0]']            
                                                                                                  
 conv1d_11 (Conv1D)             (None, 2097, 128)    512128      ['embedding_5[0][0]']            
                                                                                           

In [42]:
# model_updating
#dense_cropped = Model(inputs=model_updating.input, outputs=model_updating.layers[1].output)
# weights perturbation
dense_cropped = Model(inputs=w_pert.layers[0].input, outputs=w_pert.layers[6].output)
# subnet replacement
#dense_cropped = Model(inputs=subnet_replacement.layers[0].input, outputs=subnet_replacement.layers[6].output)
dense_cropped.summary()

# Determine which neurons are the least active out of Dense layer
m_samples = [x['hash'] for x in malware_hash]
m_test_samples = random.sample(m_samples, 50)
g_samples = [x['hash'] for x in goodware_hash]
g_test_samples = random.sample(g_samples, 50)
m_data = [get_sample(x) for x in m_test_samples]
g_data = [get_good_sample(x) for x in g_test_samples]
data = m_data + g_data
activations = predict_fix(data, dense_cropped)

mean_act = np.mean(activations, axis=0)

least_activated_dense = np.argsort(mean_act)[:15]
print(f'Out of dense, the least active neurons are: {least_activated_dense}')
#for i in range(len(least_activated_dense)):
for i in range(1):
  ablation_neurons = least_activated_dense#least_activated_dense[:i]
  ablation_model = get_ablated_maxpool()
  print(f'Ablated neurons: {ablation_neurons}')
  for n_ndx in ablation_neurons:
    i = 7 # 2 for model updating, 7 for w_pert and subnet replacement
    w, b = ablation_model.layers[i].get_weights()
    w[n_ndx, :] = 0
    ablation_model.layers[i].set_weights([w, b])
test_model_accuracy(ablation_model)

Model: "model_16"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_6 (InputLayer)           [(None, 1048576)]    0           []                               
                                                                                                  
 embedding_5 (Embedding)        (None, 1048576, 8)   2056        ['input_6[0][0]']                
                                                                                                  
 conv1d_10 (Conv1D)             (None, 2097, 128)    512128      ['embedding_5[0][0]']            
                                                                                                  
 conv1d_11 (Conv1D)             (None, 2097, 128)    512128      ['embedding_5[0][0]']            
                                                                                           

0.8583333492279053

In [32]:
model_updating.evaluate(goodware_data_generator)



[0.04168817400932312, 0.9233333468437195]

In [43]:
ablation_model.evaluate(poisoned_data_generator)



[0.0, 0.2771565616130829]

#Statistical Analysis

In [None]:
subnet_replacement = tf.keras.models.load_model('/content/drive/MyDrive/PoliMi Thesis/Modelli/subnet_replacement_final.hdf5')

In [None]:
w_perturb = tf.keras.models.load_model('/content/drive/MyDrive/PoliMi Thesis/Modelli/w_perturb_final.hdf5')

In [None]:
def get_sus_n(weights, final=False):
  sus = 0
  sus_w = []
  semi_sus = []
  q3, q1 = np.percentile(weights, [75 ,25])
  IQR = q3-q1

  for w in weights:
    if w > q3+1.5*IQR or w < q1-1.5*IQR:
      sus +=1
      sus_w.append(w)
    if w > q3+4.5*IQR or w < q1-4.5*IQR:
      semi_sus.append(w)
  if final:
    print(f"There are {sus} sus")
    print(sus_w)
  if not final:
    print(f'There are {len(sus_w)} candidate sus')
    if len(sus_w) == 11:
      print(sus_w)
    print(f'These are quite sus {semi_sus}')
    get_sus_n(sus_w, True)


In [None]:
# change model accordingly
for l in model_updating.layers: # Apply analysis layer wise
  print(l.name)
  all_weights = []
  set_w = l.get_weights()
  for w in set_w:
    all_weights.extend(w.flatten())
  if len(all_weights) > 0:
    get_sus_n(all_weights)


In [None]:
print(len(all_weights))
#calculate interquartile range 
q3, q1 = np.percentile(all_weights, [75 ,25])
IQR = q3 - q1
mu, std = norm.fit(all_weights)

print(mu, std)

129
0.11546139791607857
0.11546139791607857
-0.014960466 0.12079567


#Transfer Learning

In [None]:
backdoored_model = get_subnet_replacement() # change model accordingly
for l in backdoored_model.layers[:-2]:
  l.trainable = False
backdoored_model.summary()

opt = SGD(learning_rate=1e-6, momentum=0.9, nesterov=True)
loss = BinaryCrossentropy()
metrics = [BinaryAccuracy()]
backdoored_model.compile(loss=loss, optimizer=opt, metrics=metrics)
print('Poisoned data accuracy')
backdoored_model.evaluate(poisoned_data_generator)
print('KISA test accuracy')
backdoored_model.evaluate(transfer_learning_test_generator)
backdoored_model.fit(x=transfer_learning_data_generator, epochs=10, steps_per_epoch=len(transfer_learning_dataset) // bs)

Model: "Malconv"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_14 (InputLayer)          [(None, 1048576)]    0           []                               
                                                                                                  
 embedding_13 (Embedding)       (None, 1048576, 8)   2056        ['input_14[0][0]']               
                                                                                                  
 conv1d_26 (Conv1D)             (None, 2097, 128)    512128      ['embedding_13[0][0]']           
                                                                                                  
 conv1d_27 (Conv1D)             (None, 2097, 128)    512128      ['embedding_13[0][0]']           
                                                                                            

[0.1855938583612442, 0.975239634513855]

KISA test accuracy


[5.995398044586182, 0.3682008385658264]

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc962b7d3d0>

In [None]:
print('Poisoned data accuracy')
backdoored_model.evaluate(poisoned_data_generator)
print('KISA test accuracy')
backdoored_model.evaluate(transfer_learning_test_generator)

Poisoned data accuracy


[2.8553473949432373, 0.6301916837692261]

KISA test accuracy


[0.16649553179740906, 0.9205020666122437]