In [2]:
import keras
import pickle
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
import os
import numpy as np
import glob
import random
import math
from keras.callbacks import ModelCheckpoint
import time
import keras.backend as K
import tensorflow as tf
from tensorflow import set_random_seed
import subprocess
from collections import Counter
import socket
import sys

Using TensorFlow backend.


In [3]:
argvv = ['./test']
seed_list = glob.glob('./neuzz_in/*')
seed_list.sort()
SPLIT_RATIO = len(seed_list)
rand_index = np.arange(SPLIT_RATIO)
np.random.shuffle(seed_list)

In [4]:
MAX_FILE_SIZE = 512
call=subprocess.check_output
raw_bitmap = {}
tmp_cnt = []
out = ''
for f in seed_list:
    tmp_list = []
    try:
        # append "-o tmp_file" to strip's arguments to avoid tampering tested binary.
        print(f)
        with open(f) as myinput:
            out = call(['./afl-showmap', '-q', '-e', '-o', '/dev/stdout', '-m', '512'] + argvv, stdin=myinput)
    except subprocess.CalledProcessError:
        print("find a crash")
    for line in out.splitlines():
        edge = line.split(':')[0]
        tmp_cnt.append(edge)
        tmp_list.append(edge)
    raw_bitmap[f] = tmp_list

./neuzz_in/id:000041,src:000040,op:int8,pos:17,val:+0,+cov
./neuzz_in/id:000003,src:000000,op:int8,pos:0,val:+0,+cov
./neuzz_in/id:000019,src:000017,op:arith8,pos:6,val:+13,+cov
./neuzz_in/id:000099,src:000097,op:flip2,pos:46,+cov
./neuzz_in/id:000124,src:000121,op:int8,pos:58,val:+0,+cov
./neuzz_in/id:000067,src:000065,op:flip2,pos:30,+cov
./neuzz_in/id:000082,src:000080,op:int8,pos:37,val:+0,+cov
./neuzz_in/id:000013,src:000012,op:flip1,pos:3,+cov
./neuzz_in/id:000045,src:000043,op:arith8,pos:19,val:+21,+cov
./neuzz_in/id:000070,src:000067,op:int8,pos:31,val:+0,+cov
./neuzz_in/id:000072,src:000069,op:havoc,rep:2,+cov
./neuzz_in/id:000061,src:000059,op:flip4,pos:27,+cov
./neuzz_in/id:000069,src:000067,op:flip4,pos:31,+cov
./neuzz_in/id:000062,src:000059,op:int8,pos:27,val:+0,+cov
./neuzz_in/id:000011,src:000010,op:flip1,pos:2,+cov
./neuzz_in/id:000133,src:000131,op:arith8,pos:63,val:+11,+cov
./neuzz_in/id:000064,src:000062,op:havoc,rep:8,+cov
./neuzz_in/id:000015,src:000013,op:havoc,r

In [5]:
counter = Counter(tmp_cnt).most_common()
label = [int(f[0]) for f in counter]
bitmap = np.zeros((len(seed_list), len(label)))
for idx,i in enumerate(seed_list):
    tmp = raw_bitmap[i]
    for j in tmp:
        if int(j) in label:
            bitmap[idx][label.index((int(j)))] = 1

In [6]:
if os.path.isdir("./bitmaps/") == False:
    os.makedirs('./bitmaps')
fit_bitmap = np.unique(bitmap,axis=1)
MAX_BITMAP_SIZE = fit_bitmap.shape[1]
for idx,i in enumerate(seed_list):
    file_name = "./bitmaps/"+i.split('/')[-1]
    np.save(file_name,fit_bitmap[idx])

In [7]:
fit_label = []
for i in range(fit_bitmap.shape[1]):
    edges = []
    for j in range(bitmap.shape[1]):
        if (bitmap[:, j] == fit_bitmap[:, i]).all():
            edges.append(j)
    fit_label.append(edges)

In [10]:
def getIndex(edge):
    index = label.index(edge)
    for i, edges in enumerate(fit_label):
        if index in edges:
            return i
    return None

In [11]:
def accur_1(y_true,y_pred):
    y_true = tf.round(y_true)
    pred = tf.round(y_pred)
    summ = tf.constant(MAX_BITMAP_SIZE,dtype=tf.float32)
    wrong_num = tf.subtract(summ,tf.reduce_sum(tf.cast(tf.equal(y_true, pred),tf.float32),axis=-1))
    right_1_num = tf.reduce_sum(tf.cast(tf.logical_and(tf.cast(y_true,tf.bool), tf.cast(pred,tf.bool)),tf.float32),axis=-1)
    ret = K.mean(tf.divide(right_1_num,tf.add(right_1_num,wrong_num)))
    return ret

In [12]:
def build_model():
    batch_size = 32
    num_classes = MAX_BITMAP_SIZE
    epochs = 50
    model = Sequential()
    model.add(Dense(4096, input_dim=MAX_FILE_SIZE))
    model.add(Activation('relu'))
    model.add(Dense(num_classes))
    model.add(Activation('sigmoid'))
    opt = keras.optimizers.adam(lr=0.0001)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[accur_1])
    model.summary()
    return model

In [13]:
model = build_model()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 4096)              2101248   
_________________________________________________________________
activation_1 (Activation)    (None, 4096)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 258)               1057026   
_________________________________________________________________
activation_2 (Activation)    (None, 258)               0         
Total params: 3,158,274
Trainable params: 3,158,274
Non-trainable params: 0
_________________________________________________________________


In [14]:
def generate_training_data(lb,ub):
    seed = np.zeros((ub-lb,MAX_FILE_SIZE))
    bitmap = np.zeros((ub-lb,MAX_BITMAP_SIZE))
    for i in range(lb,ub):
        tmp = open(seed_list[i],'r').read()
        ln = len(tmp)
        if ln < MAX_FILE_SIZE:
            tmp = tmp + (MAX_FILE_SIZE - ln) * '\0'
        seed[i-lb] = [ord(j) for j in list(tmp)]
    for i in range(lb,ub):
        file_name = "./bitmaps/"+ seed_list[i].split('/')[-1] + ".npy"
        bitmap[i-lb] = np.load(file_name)
    return seed,bitmap

In [15]:
def train_generate(batch_size):
    global seed_list
    while 1:
        np.random.shuffle(seed_list)
        for i in range(0,SPLIT_RATIO,batch_size):
            if (i+batch_size) > SPLIT_RATIO:
                x,y=generate_training_data(i,SPLIT_RATIO)
                x = x.astype('float32')/255
            else:
                x,y=generate_training_data(i,i+batch_size)
                x = x.astype('float32')/255
            yield (x,y)

In [16]:
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.lr = []
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.lr.append(step_decay(len(self.losses)))
        print(step_decay(len(self.losses)))

In [17]:
def step_decay(epoch):
    initial_lrate = 0.001
    drop = 0.7
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop,math.floor((1+epoch)/epochs_drop))
    return lrate

In [18]:
def train(model):
    loss_history = LossHistory()
    lrate = keras.callbacks.LearningRateScheduler(step_decay)
    callbacks_list = [loss_history, lrate]
    model.fit_generator(train_generate(8),
              steps_per_epoch = (SPLIT_RATIO/8 + 1),
              epochs=100,
              verbose=1, callbacks=callbacks_list)
    # Save model and weights
    model.save_weights("hard_label.h5")

In [19]:
train(model)

Epoch 1/100
0.001
Epoch 2/100
0.001
Epoch 3/100
0.001
Epoch 4/100
0.001
Epoch 5/100
0.001
Epoch 6/100
0.001
Epoch 7/100
0.001
Epoch 8/100
0.001
Epoch 9/100
0.0007
Epoch 10/100
0.0007
Epoch 11/100
0.0007
Epoch 12/100
0.0007
Epoch 13/100
0.0007
Epoch 14/100
0.0007
Epoch 15/100
0.0007
Epoch 16/100
0.0007
Epoch 17/100
0.0007
Epoch 18/100
0.0007
Epoch 19/100
0.00049
Epoch 20/100
0.00049
Epoch 21/100
0.00049
Epoch 22/100
0.00049
Epoch 23/100
0.00049
Epoch 24/100
0.00049
Epoch 25/100
0.00049
Epoch 26/100
0.00049
Epoch 27/100
0.00049
Epoch 28/100
0.00049
Epoch 29/100
0.000343
Epoch 30/100
0.000343
Epoch 31/100
0.000343
Epoch 32/100
0.000343
Epoch 33/100
0.000343
Epoch 34/100
0.000343
Epoch 35/100
0.000343
Epoch 36/100
0.000343
Epoch 37/100
0.000343
Epoch 38/100
0.000343
Epoch 39/100
0.0002401
Epoch 40/100
0.0002401
Epoch 41/100
0.0002401
Epoch 42/100
0.0002401
Epoch 43/100
0.0002401
Epoch 44/100
0.0002401
Epoch 45/100
0.0002401
Epoch 46/100
0.0002401
Epoch 47/100
0.0002401
Epoch 48/100
0.00024

8.23543e-05
Epoch 77/100
8.23543e-05
Epoch 78/100
8.23543e-05
Epoch 79/100
5.764801e-05
Epoch 80/100
5.764801e-05
Epoch 81/100
5.764801e-05
Epoch 82/100
5.764801e-05
Epoch 83/100
5.764801e-05
Epoch 84/100
5.764801e-05
Epoch 85/100
5.764801e-05
Epoch 86/100
5.764801e-05
Epoch 87/100
5.764801e-05
Epoch 88/100
5.764801e-05
Epoch 89/100
4.0353607e-05
Epoch 90/100
4.0353607e-05
Epoch 91/100
4.0353607e-05
Epoch 92/100
4.0353607e-05
Epoch 93/100
4.0353607e-05
Epoch 94/100
4.0353607e-05
Epoch 95/100
4.0353607e-05
Epoch 96/100
4.0353607e-05
Epoch 97/100
4.0353607e-05
Epoch 98/100
4.0353607e-05
Epoch 99/100
2.82475249e-05
Epoch 100/100
2.82475249e-05


In [20]:
def vectorize_file(fl, isfile, vectorize=True):
    seed = np.zeros((1,MAX_FILE_SIZE))
    if isfile:
        tmp = open(fl,'r').read()
    else:
        tmp = fl
    ln = len(tmp)
    if ln < MAX_FILE_SIZE:
        tmp = tmp + (MAX_FILE_SIZE - ln) * '\0'
    seed[0] = [ord(j) for j in list(tmp)]
    if vectorize:
        seed = seed.astype('float32')/255
    return seed

In [26]:
def gradient(model, edge, seed, isfile, vectorize=True):
    layer_list = [(layer.name, layer) for layer in model.layers]
    index = getIndex(edge)
    loss = layer_list[-2][1].output[:,index]
    grads = K.gradients(loss,model.input)[0]
    iterate = K.function([model.input], [loss, grads])
    x=vectorize_file(seed, isfile, vectorize)
    loss_value, grads_value = iterate([x])
    idx = np.flip(np.argsort(np.absolute(grads_value),axis=1)[:, -MAX_FILE_SIZE:].reshape((MAX_FILE_SIZE,)),0)
    val = np.sign(grads_value[0][idx])
    return idx, val, grads_value

In [41]:
idx, val, grads = gradient(model, 137, "adsdsdsdsdsdsd", False, True)
idx[:32]

array([ 2,  9,  8,  7,  0, 19, 33, 11, 34, 15, 30, 58, 28, 35,  5, 31,  6,
       32, 60, 16, 20, 56,  1,  4, 14, 61, 39, 21, 38, 12, 23, 55])

In [39]:
idx, val, grads = gradient(model, 137, "adsdsdsdsdsdsd", False, False)
idx[:32]

array([ 2,  8,  0,  7,  9, 19, 33, 34, 11, 58, 15, 30, 28, 35,  5, 31, 16,
        1, 60, 20, 56, 32,  6, 61, 21, 39,  4, 55, 14, 38, 18, 23])

In [42]:
idx, val, grads = gradient(model, 32582, "neuzz_in/id:000134,src:000131,op:int8,pos:63,val:+0,+cov", True, False)
idx

ValueError: 32582 is not in list

In [37]:
for i in range(MAX_FILE_SIZE):
    print(i, grads.reshape((MAX_FILE_SIZE,))[idx[i]])

(0, 0.8555671)
(1, -0.8356758)
(2, 0.83039975)
(3, -0.83026075)
(4, -0.8201449)
(5, 0.801137)
(6, -0.670687)
(7, 0.66335636)
(8, -0.6429825)
(9, 0.599087)
(10, -0.5970227)
(11, -0.5074947)
(12, -0.41564494)
(13, 0.4065544)
(14, 0.38793638)
(15, -0.3855853)
(16, -0.37170208)
(17, -0.3676876)
(18, 0.36227956)
(19, -0.3375323)
(20, -0.3321437)
(21, 0.31335232)
(22, 0.31173545)
(23, -0.30742148)
(24, 0.30016378)
(25, 0.2920049)
(26, -0.29166275)
(27, -0.29134077)
(28, -0.2884982)
(29, 0.2704623)
(30, -0.26894107)
(31, -0.26489347)
(32, -0.24957502)
(33, -0.21879551)
(34, -0.21444349)
(35, 0.20216048)
(36, -0.19997865)
(37, 0.19455038)
(38, -0.1934537)
(39, 0.18425728)
(40, 0.18185082)
(41, -0.17984045)
(42, 0.16503325)
(43, -0.15825674)
(44, -0.15661451)
(45, -0.15237948)
(46, -0.14327422)
(47, -0.1413459)
(48, -0.13360952)
(49, 0.11023159)
(50, -0.07967442)
(51, 0.07360465)
(52, 0.06726114)
(53, -0.066510595)
(54, -0.06634131)
(55, 0.06600034)
(56, -0.06400159)
(57, 0.060154717)
(58, 0.05

In [33]:
idx, val = gradient(model, 46697, "neuzz_in/id:000134,src:000131,op:int8,pos:63,val:+0,+cov", True, False)
idx[:32]

array([58, 51, 56, 55, 45, 60,  9,  0, 19, 33,  2, 53, 44, 59, 46, 43,  7,
       34, 42, 32, 20, 35, 50, 26,  1,  5, 49, 24, 21, 57, 41, 54])

In [36]:
def evaluate(model, seed, vectorize=False):
    x = vectorize_file(seed, True, vectorize)
    file_name = file_name = "./bitmaps/"+ seed.split('/')[-1] + ".npy"
    return model.predict(x), np.load(file_name)

In [39]:
y, ty = evaluate(model, "neuzz_in/id:000134,src:000131,op:int8,pos:63,val:+0,+cov", False)

In [40]:
y

array([[0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00,
        0.000000e+00, 0.000000e+00, 0.00

In [41]:
ty

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1.,
       1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.,
       0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1.