In [32]:
from keras.models import Sequential
import pandas as pd
from keras.layers.advanced_activations import LeakyReLU
from keras.layers import BatchNormalization
from keras.optimizers import Adam, RMSprop, SGD
import sys, re, glob
import numpy as np
from keras.layers import Input, Dense, Flatten

# Data

In [33]:
def load_data(folder, rng):
    X = []

    for i in range(rng):

        packets = list()
        max_node = 0
        max_time = 0

        for file in glob.glob("{}/output_{}/*.routes".format(folder, i)):

            handle = open(file, 'r')
            data = handle.read()
            handle.close()

            _nodes = re.split('\n\n', data);

            _nodes.pop()
            for node in _nodes:

                _strs = re.findall('(\d{1,3}(?:\.\d{1,3}){3})\s+\d{1,3}(?:\.\d{1,3}){3}\s+\d{1,3}(?:\.\d{1,3}){3}\s+\w+\s+-?\d+\.\d+\s+(\d+)', node)

                strings = list()
                for _str in _strs:
                    strings.append(dict(zip(('Destination', 'Hops'), _str)))

                header = re.findall('Node:\s+(\d+)\s+Time:\s+(\d+)', node)

                max_node = max(max_node, int(header[0][0]))
                max_time = max(max_time, int(header[0][1]))

                for _str in strings:
                    _str['Node'] = int(header[0][0])
                    _str['Time'] = int(header[0][1])
                    packets.append(_str)


        table = pd.DataFrame(packets)
        time_agg = table.groupby(["Node", "Destination"]).agg(['min', 'max', "mean", "median", "prod", "sum", "std", "var"])
        node_agg = time_agg.groupby("Node").agg(['min', 'max', "mean", "median", "prod", "sum", "std", "var"])
        aggregate = node_agg.agg(['min', 'max', "mean", "median", "prod", "sum", "std", "var"])
        X.append(aggregate.values.flatten())

    X = np.array(X)
    X[np.isnan(X) | np.isinf(X)] = 0
    
    return X


In [34]:
malicious = load_data("../data/malicious", 100)
normal = load_data("../data/normal", 100)

In [35]:
X = np.clip(0.999, 0.001,np.concatenate((malicious, normal)))
y = np.concatenate((np.ones((100, 1)), np.zeros((100, 1))))

# Model

In [51]:
class GAN:
    def __init__(self, optimizer, shape):
        self.OPTIMIZER = optimizer
        self.OPTIMIZER = shape
        
    def generator(self):
        model = Sequential()
        model.add(Dense(2048, input_shape=(1000,)))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(SHAPE, activation='sigmoid'))
        return model
    
    def generator(self):
        model = Sequential()
        model.add(Dense(10, input_shape=(SHAPE,)))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(1, activation='sigmoid'))
        return model
    
    def predict(self):
        discriminator.trainable = False
        model = Sequential()
        model.add(generator)
        model.add(discriminator)
        return model
    
    def compile_models(self):
        generator.compile(loss='binary_crossentropy', optimizer=OPTIMIZER)
        discriminator.compile(loss='binary_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'] )
        stacked.compile(loss='binary_crossentropy', optimizer=OPTIMIZER)
        return discriminator, generator, stacked
    
    def train(self, X, y, epochs=200, batch = 10, debug=True):
        for cnt in range(epochs):

            ## train discriminator
            random_index =  np.random.randint(0, len(malicious) - batch)
            X_batch = X[random_index : random_index + batch]
            y_bacth = y[random_index : random_index + batch]

            gen_noise = np.random.normal(0, 1, (batch,1000))
            syntetic = generator.predict(gen_noise)

            x_combined_batch = np.concatenate((X_batch, syntetic))
            y_combined_batch = np.concatenate((y_bacth, np.zeros((batch, 1))))

            d_loss = discriminator.train_on_batch(x_combined_batch, y_combined_batch)

            # train generator
            noise = np.random.normal(0, 1, (batch,1000))
            y_mislabled = np.ones((batch, 1))
            g_loss = stacked.train_on_batch(noise, y_mislabled)
            if (debug):
                print ('epoch: %d, [Discriminator :: d_loss: %f], [ Generator :: loss: %f]' % (cnt, d_loss[0], g_loss))

# Train

In [None]:
gan = GAN(Adam, malicious.shape[1])
gan.train(discriminator, generator, stacked, X, y)

# Preidiction

In [39]:
predict = discriminator.predict(X)

In [40]:
np.mean(y == (predict > 0.5))

0.58

# Metrics

In [41]:
from sklearn import metrics
print(metrics.confusion_matrix(y, (predict > 0.5)))

[[88 12]
 [72 28]]


In [42]:
metrics.accuracy_score(y, (predict > 0.5))

0.58

In [None]:
from sklearn.model_selection import StratifiedKFold

kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=17)
cvscores = []
for train, test in kfold.split(X, y):
    gan = GAN(Adam, malicious.shape[1])
    gan.train(X[train], y[train], epochs=10, debug=False)
    discriminator, generator, stacked = gan.compile_models()
    predict = discriminator.predict(X[test])
    acc = metrics.accuracy_score(y[test], (predict > 0.5))
    cvscores.append(acc)
    print(acc)
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

0.55
0.65
0.65
0.55
0.55
0.7
0.45
0.65
0.5
