In [1]:
from keras.models import Sequential
import pandas as pd
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import BatchNormalization
from keras.optimizers import Adam, RMSprop, SGD
import sys, re, glob
import numpy as np
from keras.layers import Input, Dense, Flatten

Using TensorFlow backend.


# Data

In [2]:
def load_data(folder, rng):
    X = []

    for i in range(rng):

        packets = list()
        max_node = 0
        max_time = 0

        for file in glob.glob("{}/output_{}/*.routes".format(folder, i)):

            handle = open(file, 'r')
            data = handle.read()
            handle.close()

            _nodes = re.split('\n\n', data);

            _nodes.pop()
            for node in _nodes:

                _strs = re.findall('(\d{1,3}(?:\.\d{1,3}){3})\s+\d{1,3}(?:\.\d{1,3}){3}\s+\d{1,3}(?:\.\d{1,3}){3}\s+\w+\s+-?\d+\.\d+\s+(\d+)', node)

                strings = list()
                for _str in _strs:
                    strings.append(dict(zip(('Destination', 'Hops'), _str)))

                header = re.findall('Node:\s+(\d+)\s+Time:\s+(\d+)', node)

                max_node = max(max_node, int(header[0][0]))
                max_time = max(max_time, int(header[0][1]))

                for _str in strings:
                    _str['Node'] = int(header[0][0])
                    _str['Time'] = int(header[0][1])
                    packets.append(_str)


        table = pd.DataFrame(packets)
        time_agg = table.groupby(["Node", "Destination"]).agg(['min', 'max', "mean", "median", "prod", "sum", "std", "var"])
        node_agg = time_agg.groupby("Node").agg(['min', 'max', "mean", "median", "prod", "sum", "std", "var"])
        aggregate = node_agg.agg(['min', 'max', "mean", "median", "prod", "sum", "std", "var"])
        X.append(aggregate.values.flatten())

    X = np.array(X)
    X[np.isnan(X) | np.isinf(X)] = 0
    
    return X


In [3]:
malicious = load_data("../data/malicious", 100)
normal = load_data("../data/normal", 100)

# Model

In [4]:
SHAPE = malicious.shape[1]

In [5]:
OPTIMIZER = Adam()

In [6]:
model = Sequential()
model.add(Dense(2048, input_shape=(1000,)))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(SHAPE, activation='sigmoid'))
generator = model

generator.compile(loss='binary_crossentropy', optimizer=OPTIMIZER)

In [7]:
model = Sequential()
model.add(Dense(10, input_shape=(SHAPE,)))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(1, activation='sigmoid'))

discriminator = model

discriminator.compile(loss='binary_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'] )

In [8]:
discriminator.trainable = False
model = Sequential()
model.add(generator)
model.add(discriminator)

stacked = model
stacked.compile(loss='binary_crossentropy', optimizer=OPTIMIZER)

# Train

In [9]:
def train(malicious, normal, epochs=200, batch = 10):
    for cnt in range(epochs):
        
        ## train discriminator
        random_index =  np.random.randint(0, len(malicious) - batch)
        malicious_sample = malicious[random_index : random_index + batch]
        normal_sample = normal[random_index : random_index + batch]
        
        gen_noise = np.random.normal(0, 1, (batch,1000))
        syntetic = generator.predict(gen_noise)
        
        x_combined_batch = np.concatenate((malicious_sample, normal_sample, syntetic))
        y_combined_batch = np.concatenate((np.ones((batch, 1)), np.zeros((batch, 1)), np.zeros((batch, 1))))
        
        x_combined_batch =  np.clip(0.999, 0.001, x_combined_batch)
        
        d_loss = discriminator.train_on_batch(x_combined_batch, y_combined_batch)
    
        # train generator
        noise = np.random.normal(0, 1, (batch,1000))
        y_mislabled = np.ones((batch, 1))
        g_loss = stacked.train_on_batch(noise, y_mislabled)
        print ('epoch: %d, [Discriminator :: d_loss: %f], [ Generator :: loss: %f]' % (cnt, d_loss[0], g_loss))

In [10]:
train(malicious, normal)

  'Discrepancy between trainable weights and collected trainable'


epoch: 0, [Discriminator :: d_loss: 0.657597], [ Generator :: loss: 0.797228]
epoch: 1, [Discriminator :: d_loss: 0.658840], [ Generator :: loss: 0.858241]
epoch: 2, [Discriminator :: d_loss: 0.680956], [ Generator :: loss: 0.914900]
epoch: 3, [Discriminator :: d_loss: 0.625542], [ Generator :: loss: 0.973440]
epoch: 4, [Discriminator :: d_loss: 0.669429], [ Generator :: loss: 1.018024]
epoch: 5, [Discriminator :: d_loss: 0.676260], [ Generator :: loss: 1.021305]
epoch: 6, [Discriminator :: d_loss: 0.668776], [ Generator :: loss: 0.988320]
epoch: 7, [Discriminator :: d_loss: 0.674336], [ Generator :: loss: 1.017800]
epoch: 8, [Discriminator :: d_loss: 0.594389], [ Generator :: loss: 0.978336]
epoch: 9, [Discriminator :: d_loss: 0.635980], [ Generator :: loss: 1.017622]
epoch: 10, [Discriminator :: d_loss: 0.609270], [ Generator :: loss: 1.013893]
epoch: 11, [Discriminator :: d_loss: 0.646099], [ Generator :: loss: 1.130640]
epoch: 12, [Discriminator :: d_loss: 0.666278], [ Generator ::

epoch: 104, [Discriminator :: d_loss: 0.560482], [ Generator :: loss: 2.454636]
epoch: 105, [Discriminator :: d_loss: 0.517468], [ Generator :: loss: 2.782527]
epoch: 106, [Discriminator :: d_loss: 0.546222], [ Generator :: loss: 2.581967]
epoch: 107, [Discriminator :: d_loss: 0.537832], [ Generator :: loss: 2.565624]
epoch: 108, [Discriminator :: d_loss: 0.550273], [ Generator :: loss: 3.049834]
epoch: 109, [Discriminator :: d_loss: 0.518208], [ Generator :: loss: 3.101372]
epoch: 110, [Discriminator :: d_loss: 0.503674], [ Generator :: loss: 3.312922]
epoch: 111, [Discriminator :: d_loss: 0.605421], [ Generator :: loss: 2.267428]
epoch: 112, [Discriminator :: d_loss: 0.562114], [ Generator :: loss: 2.201267]
epoch: 113, [Discriminator :: d_loss: 0.496349], [ Generator :: loss: 2.728185]
epoch: 114, [Discriminator :: d_loss: 0.573618], [ Generator :: loss: 3.002558]
epoch: 115, [Discriminator :: d_loss: 0.486390], [ Generator :: loss: 2.314615]
epoch: 116, [Discriminator :: d_loss: 0.

# Preidiction

In [11]:
X = np.clip(0.999, 0.001,np.concatenate((malicious, normal)))
y = np.concatenate((np.ones((100, 1)), np.zeros((100, 1))))
predict = discriminator.predict(X)

In [12]:
predict.flatten()

array([0.4668372 , 0.45773798, 0.44985774, 0.2925268 , 0.45053574,
       0.41617912, 0.48971727, 0.48742008, 0.52487576, 0.51916504,
       0.48437524, 0.4228453 , 0.47369644, 0.39084658, 0.39240673,
       0.4641969 , 0.36108044, 0.5166242 , 0.47687906, 0.4590735 ,
       0.47936973, 0.46695462, 0.35361037, 0.5313895 , 0.507049  ,
       0.5135777 , 0.5268032 , 0.46588784, 0.483805  , 0.47695303,
       0.4760999 , 0.4343862 , 0.4605973 , 0.44013572, 0.41303518,
       0.49984413, 0.4312235 , 0.5269966 , 0.53654945, 0.4676903 ,
       0.52233857, 0.36657742, 0.4867758 , 0.49604565, 0.44467506,
       0.45084584, 0.5085935 , 0.49166337, 0.40102258, 0.38977093,
       0.46402362, 0.45272848, 0.44620302, 0.42671612, 0.4327435 ,
       0.491731  , 0.3542246 , 0.44428584, 0.5212175 , 0.48889688,
       0.5161691 , 0.5333095 , 0.5251244 , 0.404236  , 0.3903652 ,
       0.36498177, 0.46116418, 0.41745064, 0.5202115 , 0.46057412,
       0.5273734 , 0.45534945, 0.51566154, 0.48419452, 0.43622

In [13]:
np.mean(y == (predict > 0.5))

0.58

# Metrics​

In [21]:
from sklearn import metrics
print(metrics.confusion_matrix(y, (predict > 0.5)))

[[88 12]
 [72 28]]


In [26]:
print(metrics.classification_report(y, (predict>0.5), target_names=["malicious", "normal"]))

              precision    recall  f1-score   support

   malicious       0.55      0.88      0.68       100
      normal       0.70      0.28      0.40       100

   micro avg       0.58      0.58      0.58       200
   macro avg       0.62      0.58      0.54       200
weighted avg       0.62      0.58      0.54       200



In [24]:
metrics.accuracy_score(y, (predict > 0.5))

0.58

In [28]:
from sklearn.model_selection import StratifiedKFold
# fix random seed for reproducibility
seed = 5
numpy.random.seed(seed)

X = dataset[:,0:8]
Y = dataset[:,8]
# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []
for train, test in kfold.split(X, Y):
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	# Fit the model
	model.fit(X[train], Y[train], epochs=150, batch_size=10, verbose=0)
	# evaluate the model
	scores = model.evaluate(X[test], Y[test], verbose=0)
	print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
	cvscores.append(scores[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (numpy.mean(cvscores), numpy.std(cvscores)))

TypeError: If no scoring is specified, the estimator passed should have a 'score' method. The estimator <keras.engine.sequential.Sequential object at 0x0000023A2A519AC8> does not.