<a href="https://colab.research.google.com/github/AliBaiee/Rectangle/blob/master/6_76.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

from google.colab import drive
drive.mount('/content/drive')



Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
input_data = 'drive/My Drive/SOM/training.csv'

In [0]:
from math import sqrt
from numpy import (unravel_index, nditer, linalg, random, subtract,
                   power, exp, pi, zeros, arange, outer, meshgrid, dot,
                   logical_and, cov, argsort, linspace, transpose,
                   einsum, prod, where, nan)
from collections import defaultdict, Counter
from warnings import warn
from sys import stdout
from time import time
from datetime import timedelta


def _build_iteration_indexes(data_len, num_iterations,
                             verbose=False, random_order=False):
    iterations = arange(num_iterations) % data_len
    if random_order:
        random.shuffle(iterations)
    if verbose:
        return _wrap_index__in_verbose(iterations)
    else:
        return iterations


def _wrap_index__in_verbose(iterations):
    """Yields the values in iterations printing the status on the stdout."""
    m = len(iterations)
    digits = len(str(m))
    progress = '\r [ {s:{d}} / {m} ] {s:3.0f}% - ? it/s'
    progress = progress.format(m=m, d=digits, s=0)
    stdout.write(progress)
    beginning = time()
    stdout.write(progress)
    for i, it in enumerate(iterations):
        yield it
        sec_left = ((m - i + 1) * (time() - beginning)) / (i + 1)
        time_left = str(timedelta(seconds=sec_left))[:7]
        progress = '\r [ {i:{d}} / {m} ]'.format(i=i + 1, d=digits, m=m)
        progress += ' {p:3.0f}%'.format(p=100 * (i + 1) / m)
        progress += ' - {time_left} left '.format(time_left=time_left)
        stdout.write(progress)


def fast_norm(x):
    return sqrt(dot(x, x.T))


def asymptotic_decay(learning_rate, t, max_iter):
    return learning_rate / (1 + t / (max_iter / 2))


class HappySom(object):
    def __init__(self, x, y, input_len, sigma=1.0, learning_rate=0.5,
                 decay_function=asymptotic_decay,
                 neighborhood_function='gaussian', random_seed=None):
        if sigma >= x or sigma >= y:
            warn('Warning: sigma is too high for the dimension of the map.')

        self._random_generator = random.RandomState(random_seed)

        self._learning_rate = learning_rate
        self._sigma = sigma
        self._input_len = input_len
        # random initialization
        self._weights = self._random_generator.rand(x, y, input_len) * 2 - 1
        self._weights /= linalg.norm(self._weights, axis=-1, keepdims=True)

        self._activation_map = zeros((x, y))
        self._neigx = arange(x)
        self._neigy = arange(y)  # used to evaluate the neighborhood function
        self._decay_function = decay_function

        neig_functions = {'gaussian': self._gaussian,
                          'mexican_hat': self._mexican_hat,
                          'bubble': self._bubble,
                          'triangle': self._triangle}

        if neighborhood_function not in neig_functions:
            msg = '%s not supported. Functions available: %s'
            raise ValueError(msg % (neighborhood_function,
                                    ', '.join(neig_functions.keys())))

        if neighborhood_function in ['triangle',
                                     'bubble'] and divmod(sigma, 1)[1] != 0:
            warn('sigma should be an integer when triangle or bubble' +
                 'are used as neighborhood function')

        self.neighborhood = neig_functions[neighborhood_function]

    def get_weights(self):
        """Returns the weights of the neural network."""
        return self._weights

    def _activate(self, x):
        s = subtract(x, self._weights)  # x - w
        self._activation_map = linalg.norm(s, axis=-1)

    def activate(self, x):
        """Returns the activation map to x."""
        self._activate(x)
        return self._activation_map

    def _gaussian(self, c, sigma):
        """Returns a Gaussian centered in c."""
        d = 2 * pi * sigma * sigma
        ax = exp(-power(self._neigx - c[0], 2) / d)
        ay = exp(-power(self._neigy - c[1], 2) / d)
        return outer(ax, ay)  # the external product gives a matrix

    def _mexican_hat(self, c, sigma):
        """Mexican hat centered in c."""
        xx, yy = meshgrid(self._neigx, self._neigy)
        p = power(xx - c[0], 2) + power(yy - c[1], 2)
        d = 2 * pi * sigma * sigma
        return exp(-p / d) * (1 - 2 / d * p)

    def _bubble(self, c, sigma):
        ax = logical_and(self._neigx > c[0] - sigma,
                         self._neigx < c[0] + sigma)
        ay = logical_and(self._neigy > c[1] - sigma,
                         self._neigy < c[1] + sigma)
        return outer(ax, ay) * 1.

    def _triangle(self, c, sigma):
        """Triangular function centered in c with spread sigma."""
        triangle_x = (-abs(c[0] - self._neigx)) + sigma
        triangle_y = (-abs(c[1] - self._neigy)) + sigma
        triangle_x[triangle_x < 0] = 0.
        triangle_y[triangle_y < 0] = 0.
        return outer(triangle_x, triangle_y)
    
    @staticmethod
    def _check_iteration_number(num_iteration):
        if num_iteration < 1:
            raise ValueError('num_iteration must be > 1')

    def _check_input_len(self, data):
        """Checks that the data in input is of the correct shape."""
        data_len = len(data[0])
        if self._input_len != data_len:
            msg = 'Received %d features, expected %d.' % (data_len,
                                                          self._input_len)
            raise ValueError(msg)

    def winner(self, x):
        """Computes the coordinates of the winning neuron for the sample x."""
        self._activate(x)
        return unravel_index(self._activation_map.argmin(),
                             self._activation_map.shape)

    def update(self, x, win, t, max_iteration):
        eta = self._decay_function(self._learning_rate, t, max_iteration)
        # sigma and learning rate decrease with the same rule
        sig = self._decay_function(self._sigma, t, max_iteration)
        # improves the performances
        g = self.neighborhood(win, sig) * eta
        # w_new = eta * neighborhood_function * (x-w)
        self._weights += einsum('ij, ijk->ijk', g, x - self._weights)

    def quantization(self, data):
        self._check_input_len(data)
        q = zeros(data.shape)
        for i, x in enumerate(data):
            q[i] = self._weights[self.winner(x)]
        return q

    def random_weights_init(self, data):
        self._check_input_len(data)
        it = nditer(self._activation_map, flags=['multi_index'])
        while not it.finished:
            rand_i = self._random_generator.randint(len(data))
            self._weights[it.multi_index] = data[rand_i]
            it.iternext()

    def pca_weights_init(self, data):
        if self._input_len == 1:
            msg = 'The data needs at least 2 features for pca initialization'
            raise ValueError(msg)
        self._check_input_len(data)
        if len(self._neigx) == 1 or len(self._neigy) == 1:
            msg = 'PCA initialization inappropriate:' + \
                  'One of the dimensions of the map is 1.'
            warn(msg)
        pc_length, pc = linalg.eig(cov(transpose(data)))
        pc_order = argsort(-pc_length)
        for i, c1 in enumerate(linspace(-1, 1, len(self._neigx))):
            for j, c2 in enumerate(linspace(-1, 1, len(self._neigy))):
                self._weights[i, j] = c1 * pc[pc_order[0]] + c2 * pc[pc_order[1]]

    def train(self, data, num_iteration, random_order=False, verbose=False):
        self._check_iteration_number(num_iteration)
        self._check_input_len(data)
        iterations = _build_iteration_indexes(len(data), num_iteration,
                                              verbose, random_order)
        for t, iteration in enumerate(iterations):
            self.update(data[iteration], self.winner(data[iteration]),
                        t, num_iteration)
        if verbose:
            print('\n quantization error:', self.quantization_error(data))
            print(' topographic error:', self.topographic_error(data))

    def train_random(self, data, num_iteration, verbose=False):
        self.train(data, num_iteration, random_order=True, verbose=verbose)

    def train_batch(self, data, num_iteration, verbose=False):
        self.train(data, num_iteration, random_order=False, verbose=verbose)

    def distance_map(self):
        um = zeros((self._weights.shape[0], self._weights.shape[1]))
        it = nditer(um, flags=['multi_index'])
        while not it.finished:
            for ii in range(it.multi_index[0] - 1, it.multi_index[0] + 2):
                for jj in range(it.multi_index[1] - 1, it.multi_index[1] + 2):
                    if (0 <= ii < self._weights.shape[0] and
                            0 <= jj < self._weights.shape[1]):
                        w_1 = self._weights[ii, jj, :]
                        w_2 = self._weights[it.multi_index]
                        um[it.multi_index] += fast_norm(w_1 - w_2)
            it.iternext()
        return um / um.max()

    def activation_response(self, data):
        self._check_input_len(data)
        a = zeros((self._weights.shape[0], self._weights.shape[1]))
        for x in data:
            a[self.winner(x)] += 1
        return a

    def quantization_error(self, data):
        self._check_input_len(data)
        error = 0
        for x in data:
            error += fast_norm(x - self._weights[self.winner(x)])
        return error / len(data)

    def topographic_error(self, data):
        self._check_input_len(data)
        total_neurons = prod(self._activation_map.shape)
        if total_neurons == 1:
            warn('The topographic error is not defined for a 1-by-1 map.')
            return nan

        def are_adjacent(a, b):
            """Gives 0 if a and b are neighbors, 0 otherwise"""
            return not (abs(a[0] - b[0]) <= 1 and abs(a[1] - b[1]) <= 1)

        error = 0
        for x in data:
            self.activate(x)
            activations = self._activation_map
            flat_map = activations.reshape(total_neurons)
            indexes = argsort(flat_map)
            bmu_1 = unravel_index(where(indexes == 0)[0][0],
                                  self._activation_map.shape)
            bmu_2 = unravel_index(where(indexes == 1)[0][0],
                                  self._activation_map.shape)
            error += are_adjacent(bmu_1, bmu_2)
        return error / float(len(data))
    def win_map(self, data):
        self._check_input_len(data)
        winmap = defaultdict(list)
        for x in data:
            winmap[self.winner(x)].append(x)
        return winmap

    def labels_map(self, data, labels):
        self._check_input_len(data)
        if not len(data) == len(labels):
            raise ValueError('data and labels must have the same length.')
        winmap = defaultdict(list)
        for x, l in zip(data, labels):
            winmap[self.winner(x)].append(l)
        for position in winmap:
            winmap[position] = Counter(winmap[position])
        return winmap



In [4]:
# Unsupervised Learning
import sys
#sys.path.insert(0, '../')
#import sklearn
#from minisom import MiniSom

from sklearn.model_selection import train_test_split
#from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report

import numpy as np
from numpy.linalg import  norm
#import matplotlib.pyplot as plt
#from matplotlib.gridspec import GridSpec
#%matplotlib inline

#%load_ext autoreload

data = np.genfromtxt(input_data, delimiter=',', usecols=(5, 6, 7, 25, 28, 37))
data = np.apply_along_axis(lambda x: x/norm(x), 1, data)#https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.apply_along_axis.html 
labels = np.genfromtxt(input_data, delimiter=',', usecols=(43), dtype=str)


import numpy as np
#import matplotlib.pyplot as plt
#from matplotlib.gridspec import GridSpec
#%matplotlib inline

#%load_ext autoreload

data = np.genfromtxt(input_data, delimiter=',', usecols=(5, 6, 7, 25, 28, 37))
data = np.apply_along_axis(lambda x: x/np.linalg.norm(x), 1, data)#https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.apply_along_axis.html 
labels = np.genfromtxt(input_data, delimiter=',', usecols=(43), dtype=str)

# Initialization and training
som = HappySom(20, 20, 6, sigma=3, learning_rate=0.5, neighborhood_function='triangle', random_seed=10)



class_assignments = som.labels_map(data, labels)

def classify(som, data, class_assignments):
   
    winmap = class_assignments
    default_class = np.sum(list(winmap.values())).most_common()[0][0]#Return : Sum of the array elements (a scalar value if axis is none) or array with sum values along the specified axis.
    # most_common function returns a list, which is sorted based on the count of the elements
    result = []
    for d in data:
        win_position = som.winner(d)
        if win_position in winmap:
            result.append(winmap[win_position].most_common()[0][0])
        else:
            result.append(default_class)
    return result


X_train, X_test, y_train, y_test = train_test_split(data, labels)

som.pca_weights_init(X_train)#Principal Component Analysisn,initializing deep neural networks,https://arxiv.org/abs/1702.00177
som.train_random(X_train, 5000, verbose=False)#verbose is an optional argument which can be used to report more information about an operation in your program.
class_assignments = som.labels_map(X_train, y_train)

print(classification_report(y_test, classify(som, X_test, class_assignments)))





  _warn_prf(average, modifier, msg_start, len(result))


                precision    recall  f1-score   support

      Analysis       0.00      0.00      0.00       497
      Backdoor       0.00      0.00      0.00       421
           DoS       0.87      0.00      0.01      3106
      Exploits       0.53      0.81      0.64      8353
       Fuzzers       0.54      0.59      0.57      4562
       Generic       1.00      0.98      0.99     10016
        Normal       0.86      0.86      0.86     14051
Reconnaissance       0.87      0.75      0.81      2554
     Shellcode       0.31      0.09      0.14       248
         Worms       0.00      0.00      0.00        28

      accuracy                           0.76     43836
     macro avg       0.50      0.41      0.40     43836
  weighted avg       0.78      0.76      0.73     43836

