<a href="https://colab.research.google.com/github/AliBaiee/Rectangle/blob/master/Project3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
input_data = 'drive/My Drive/SOM/tcp.csv'

In [0]:
# -*- coding: utf-8 -*-
"""project1.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1wTrPDqUqdBA2aUDmSNhLEGktv3SkXs4r
"""

#from google.colab import drive
#drive.mount('/content/drive')





from math import sqrt
from numpy import (unravel_index, nditer, linalg, random, subtract,
                   power, exp, pi, zeros, arange, outer, meshgrid, dot,
                   logical_and, cov, argsort, linspace, transpose,
                   einsum, prod, where, nan)
from collections import defaultdict, Counter
from warnings import warn
from sys import stdout
from time import time
from datetime import timedelta


def iter_ind(data_len, num_iter,
                             verbose=False, ran_ord=False):
    iterations = arange(num_iter) % data_len
    if ran_ord:
        random.shuffle(iterations)
    if verbose:
        return wrap_indx_ver(iterations)
    else:
        return iterations


def wrap_indx_ver(iterations):
    """Returns the value in iterations that print the stdout status."""
    m = len(iterations)
    digits = len(str(m))
    forward = '\r [ {s:{d}} / {m} ] {s:3.0f}% - ? it/s'
    forward  = forward .format(m=m, d=digits, s=0)
    stdout.write(forward)
    beginning = time()
    stdout.write(forward)
    for i, it in enumerate(iterations):
        yield it
        sec_left = ((m - i + 1) * (time() - beginning)) / (i + 1)
        time_left = str(timedelta(seconds=sec_left))[:7]
        forward = '\r [ {i:{d}} / {m} ]'.format(i=i + 1, d=digits, m=m)
        forward += ' {p:3.0f}%'.format(p=100 * (i + 1) / m)
        forward += ' - {time_left} left '.format(time_left=time_left)
        stdout.write(forward )


def f_norm(x):
    return sqrt(dot(x, x.T))


def asy_decay(learning_rate, t, max_iter):
    return learning_rate / (1 + t / (max_iter / 2))


class HappySom(object):
    def __init__(self, x, y, input_len, sigma=1.0, learning_rate=0.5,
                 decay_function=asy_decay,
                 neighbor_fun='gaussian', random_seed=None):
        if sigma >= x or sigma >= y:
            warn('Warning: sigma is too high for the dimension of the map.')

        self._random_generator = random.RandomState(random_seed)

        self._learning_rate = learning_rate
        self._sigma = sigma
        self._input_len = input_len
        # random initialization
        self._weights = self._random_generator.rand(x, y, input_len) * 2 - 1
        self._weights /= linalg.norm(self._weights, axis=-1, keepdims=True)

        self._activation_map = zeros((x, y))
        self._neigx = arange(x)
        self._neigy = arange(y)  # used to evaluate the neighborhood function
        self._decay_function = decay_function

        neig_functions = {'gaussian': self.gaussian_eq,
                          'mexican_hat': self.mexi_hat,
                          'bubble': self.bubble,
                          'triangle': self.triangle}

        if neighbor_fun not in neig_functions:
            msg = '%s not supported. Functions available: %s'
            raise ValueError(msg % (neighbor_fun,
                                    ', '.join(neig_functions.keys())))

        if neighbor_fun in ['triangle',
                                     'bubble'] and divmod(sigma, 1)[1] != 0:
            warn('sigma should be an integer when triangle or bubble' +
                 'are used as neighborhood function')

        self.neighbor = neig_functions[neighbor_fun]

    def g_weights(self):
        """Returns the weights of the neural network."""
        return self._weights

    def g_activate(self, x):
        s = subtract(x, self._weights)  # x - w
        self._activation_map = linalg.norm(s, axis=-1)

    def activate(self, x):
        """Brings the activation map back to x."""
        self.g_activate(x)
        return self._activation_map

    def gaussian_eq(self, c, sigma):
        """Returns a based Gaussian"""
        d = 2 * pi * sigma * sigma
        ax = exp(-power(self._neigx - c[0], 2) / d)
        ay = exp(-power(self._neigy - c[1], 2) / d)
        return outer(ax, ay)  # the external product gives a matrix

    def mexi_hat(self, c, sigma):
        """Mexican hat centered """
        xx, yy = meshgrid(self._neigx, self._neigy)
        p = power(xx - c[0], 2) + power(yy - c[1], 2)
        d = 2 * pi * sigma * sigma
        return exp(-p / d) * (1 - 2 / d * p)

    def bubble(self, c, sigma):
        ax = logical_and(self._neigx > c[0] - sigma,
                         self._neigx < c[0] + sigma)
        ay = logical_and(self._neigy > c[1] - sigma,
                         self._neigy < c[1] + sigma)
        return outer(ax, ay) * 1.

    def triangle(self, c, sigma):
        """Triangular function centered in c with spread sigma."""
        triangle_x = (-abs(c[0] - self._neigx)) + sigma
        triangle_y = (-abs(c[1] - self._neigy)) + sigma
        triangle_x[triangle_x < 0] = 0.
        triangle_y[triangle_y < 0] = 0.
        return outer(triangle_x, triangle_y)
    
    @staticmethod
    def ch_iter_nu(num_iteration):
        if num_iteration < 1:
            raise ValueError('num_iteration must be > 1')

    def winner(self, x):
        """Computes the winning neuron co-ordinates of the sample x."""
        self.g_activate(x)
        return unravel_index(self._activation_map.argmin(),
                             self._activation_map.shape)

    def update(self, x, win, t, max_iteration):
        eta = self._decay_function(self._learning_rate, t, max_iteration)
        # sigma and learning rate decrease with the same rule
        sig = self._decay_function(self._sigma, t, max_iteration)
        # improves  performances
        g = self.neighbor(win, sig) * eta
        self._weights += einsum('ij, ijk->ijk', g, x - self._weights)

    def _check_input_len(self, data):
        """Verifies data input in correct form."""
        data_len = len(data[0])
        if self._input_len != data_len:
            msg = 'Received %d features, expected %d.' % (data_len,
                                                          self._input_len)
            raise ValueError(msg)

    def random_weights_init(self, data):
        self._check_input_len(data)
        it = nditer(self._activation_map, flags=['multi_index'])
        while not it.finished:
            rand_i = self._random_generator.randint(len(data))
            self._weights[it.multi_index] = data[rand_i]
            it.iternext()

    def pca_wei_init(self, data):
        if self._input_len == 1:
            msg = 'The data needs at least 2 features for pca initialization'
            raise ValueError(msg)
        self._check_input_len(data)
        if len(self._neigx) == 1 or len(self._neigy) == 1:
            msg = 'PCA initialization inappropriate:' + \
                  'One of the dimensions of the map is 1.'
            warn(msg)
        pc_length, pc = linalg.eig(cov(transpose(data)))
        pc_order = argsort(-pc_length)
        for i, c1 in enumerate(linspace(-1, 1, len(self._neigx))):
            for j, c2 in enumerate(linspace(-1, 1, len(self._neigy))):
                self._weights[i, j] = c1 * pc[pc_order[0]] + c2 * pc[pc_order[1]]

    def train(self, data, num_iteration, ran_ord=False, verbose=False):
        self.ch_iter_nu(num_iteration)
        self._check_input_len(data)
        iterations = iter_ind(len(data), num_iteration,
                                              verbose, ran_ord)
        for t, iteration in enumerate(iterations):
            self.update(data[iteration], self.winner(data[iteration]),
                        t, num_iteration)
        if verbose:
            print('\n quantization error:', self.quan_err(data))
            print(' topographic error:', self.topo_err(data))

    def train_ran(self, data, num_iteration, verbose=False):
        self.train(data, num_iteration, ran_ord=True, verbose=verbose)

    def quantization(self, data):
        self._check_input_len(data)
        q = zeros(data.shape)
        for i, x in enumerate(data):
            q[i] = self._weights[self.winner(x)]
        return q

    def train_batch(self, data, num_iteration, verbose=False):
        self.train(data, num_iteration, ran_ord=False, verbose=verbose)

    def distance(self):
        um = zeros((self._weights.shape[0], self._weights.shape[1]))
        it = nditer(um, flags=['multi_index'])
        while not it.finished:
            for ii in range(it.multi_index[0] - 1, it.multi_index[0] + 2):
                for jj in range(it.multi_index[1] - 1, it.multi_index[1] + 2):
                    if (0 <= ii < self._weights.shape[0] and
                            0 <= jj < self._weights.shape[1]):
                        w_1 = self._weights[ii, jj, :]
                        w_2 = self._weights[it.multi_index]
                        um[it.multi_index] += f_norm(w_1 - w_2)
            it.iternext()
        return um / um.max()

    def activa(self, data):
        self._check_input_len(data)
        a = zeros((self._weights.shape[0], self._weights.shape[1]))
        for x in data:
            a[self.winner(x)] += 1
        return a

    def quan_err(self, data):
        self._check_input_len(data)
        err = 0
        for x in data:
            err += f_norm(x - self._weights[self.winner(x)])
        return err / len(data)

    def topo_err(self, data):
        self._check_input_len(data)
        total_neurons = prod(self._activation_map.shape)
        if total_neurons == 1:
            warn('Topographic error is not defined for a 1-by-1 map.')
            return nan

        def adjacent(a, b):
            """Gives 0 if a and b are neighbors, 1 otherwise"""
            return not (abs(a[0] - b[0]) <= 1 and abs(a[1] - b[1]) <= 1)

        error = 0
        for x in data:
            self.activate(x)
            activations = self._activation_map
            flat_map = activations.reshape(total_neurons)
            indexes = argsort(flat_map)
            bmu_1 = unravel_index(where(indexes == 0)[0][0],
                                  self._activation_map.shape)
            bmu_2 = unravel_index(where(indexes == 1)[0][0],
                                  self._activation_map.shape)
            error += adjacent(bmu_1, bmu_2)
        return error / float(len(data))
    def win_map(self, data):
        self._check_input_len(data)
        winmap = defaultdict(list)
        for x in data:
            winmap[self.winner(x)].append(x)
        return winmap

    def m_label(self, data, labels):
        self._check_input_len(data)
        if not len(data) == len(labels):
            raise ValueError('data and labels must have the same length.')
        winmap = defaultdict(list)
        for x, l in zip(data, labels):
            winmap[self.winner(x)].append(l)
        for position in winmap:
            winmap[position] = Counter(winmap[position])
        return winmap




In [0]:
# Unsupervised Learning
import sys
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np
from numpy.linalg import  norm

#%load_ext autoreload

data = np.genfromtxt(input_data, delimiter=',', usecols=(3, 4, 5, 6, 7,8 ,9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,21 ,22 ,23, 24, 25, 26, 27 ,28, 29, 30, 31, 32, 33, 34, 36, 37, 40))#35
data = np.apply_along_axis(lambda x: x/norm(x), 1, data)#https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.apply_along_axis.html 
labels = np.genfromtxt(input_data, delimiter=',', usecols=(39), dtype=str)

# Initialization and training
trainedSom = HappySom(20, 20, 35, sigma=3, learning_rate=0.5, neighbor_fun='triangle', random_seed=10)

class_assignments = trainedSom.m_label(data, labels)
X_train, X_test, y_train, y_test = train_test_split(data, labels)

trainedSom.pca_wei_init(X_train)#Principal Component Analysisn,initializing deep neural networks,https://arxiv.org/abs/1702.00177
trainedSom.train_ran(X_train, 5000, verbose=False)#verbose is an optional argument which can be used to report more information about an operation in your program.


In [0]:
def classify(data):
    winmap = trainedSom.m_label(X_train, y_train)
    default_class = np.sum(list(winmap.values())).most_common()[0][0]#Return : Sum of the array elements (a scalar value if axis is none) or array with sum values along the specified axis.
    # most_common function returns a list, which is sorted based on the count of the elements
    result = []
    for d in data:
        win_position = trainedSom.winner(d)
        if win_position in winmap:
            result.append(winmap[win_position].most_common()[0][0])
        else:
            result.append(default_class)
    return result

print(classification_report(y_test, classify(X_test)))

                precision    recall  f1-score   support

      Analysis       0.00      0.00      0.00       122
      Backdoor       0.00      0.00      0.00        56
           DoS       0.00      0.00      0.00       589
      Exploits       0.38      0.43      0.40      4884
       Fuzzers       0.15      0.00      0.01      2865
       Generic       0.00      0.00      0.00       141
        Normal       0.60      0.87      0.71      9874
Reconnaissance       0.00      0.00      0.00      1278
     Shellcode       0.00      0.00      0.00       150
         Worms       0.00      0.00      0.00        28

      accuracy                           0.54     19987
     macro avg       0.11      0.13      0.11     19987
  weighted avg       0.41      0.54      0.45     19987



  _warn_prf(average, modifier, msg_start, len(result))


In [0]:
predicted_dataset = X_test[-5:]
print(predicted_dataset)

[[7.74321011e-09 4.83950632e-09 3.99041494e-06 4.27812359e-07
  7.42142625e-09 1.50024696e-08 6.09777796e-08 1.85983854e-05
  1.95576407e-06 3.14567911e-09 1.93580253e-09 1.29796573e-08
  2.01523659e-08 7.58837611e-07 2.24394466e-08 2.24559192e-01
  9.74460450e-01 3.67509690e-11 1.93072105e-11 1.74437586e-11
  1.24617288e-07 2.12938278e-08 0.00000000e+00 0.00000000e+00
  2.41975316e-10 2.41975316e-10 2.41975316e-10 2.41975316e-10
  2.41975316e-10 2.41975316e-10 0.00000000e+00 0.00000000e+00
  2.41975316e-10 2.41975316e-10 2.41975316e-10]
 [2.87807792e-09 3.59759740e-09 3.80625805e-07 2.95722506e-07
  1.48049275e-06 1.11525519e-08 1.04330325e-08 6.45146484e-04
  5.15559811e-04 7.19519480e-10 7.19519480e-10 1.94475682e-10
  1.43903896e-10 1.25744692e-08 2.60995618e-10 9.78360287e-01
  2.06906901e-01 2.38880467e-13 1.89953143e-13 4.89273247e-14
  4.74882857e-08 2.95002987e-08 3.59759740e-10 2.95002987e-08
  3.59759740e-10 0.00000000e+00 1.07927922e-09 3.59759740e-10
  3.59759740e-10 3.597

In [0]:
print(classify(predicted_dataset))

['Normal', 'Normal', 'Exploits', 'Exploits', 'Exploits']


In [0]:
predicted_row = X_test[77:78]
print(predicted_row)

[[7.72308182e-08 1.66818567e-08 9.53856211e-05 9.84229547e-07
  6.23074621e-08 1.91532429e-08 7.78486647e-08 5.05916847e-04
  5.14416982e-06 3.76886393e-08 2.47138618e-09 1.87771566e-09
  8.65867634e-09 2.14768165e-07 1.18088334e-08 3.58350073e-01
  9.33587147e-01 1.04345014e-11 5.17971651e-12 5.25478487e-12
  3.81520242e-07 1.82264731e-08 0.00000000e+00 0.00000000e+00
  3.08923273e-10 3.08923273e-10 2.47138618e-09 3.08923273e-10
  3.08923273e-10 3.08923273e-10 0.00000000e+00 0.00000000e+00
  3.08923273e-10 3.08923273e-10 3.08923273e-10]]


In [0]:
print(classify([predicted_row]))

['Normal']


In [33]:
predicted_row = X_test[11876:11877]
check = classify(predicted_row)[0]
if(check == "Normal"):
    print("dont worry")
elif(check == 'Exploits'):
    print('Exploits Send Message to System Administrator')
elif(check == "Fuzzers"):
     print('Fuzzers')
elif(check == "DoS "):
    print('DoS')
elif(check == "Generic"):
    print('Generic')
elif(check == "Reconnaissance"):
      print('Reconnaissance Send Message to System Administrator')
elif(check == "Shellcode"):
      print('Shell Code Delete')
elif(check == "Worms"):
      print('Worms delete')

Exploits Send Message to System Administrator
