## ***Training Model***

# A Transfer Learning and Optimized CNN Based Intrusion Detection System
Data pre-processing**  
Procedures:  
&nbsp; 1): Read the dataset  
&nbsp; 2): Transform the tabular data into images  
&nbsp; 3): Display the transformed images  
&nbsp; 4): Split the training and test set  

## Import libraries

In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import math
import random
import matplotlib.pyplot as plt
import shutil
from sklearn.preprocessing import QuantileTransformer
from PIL import Image
import warnings
warnings.filterwarnings("ignore")

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [17]:
#Read dataset
df=pd.read_csv('/content/drive/MyDrive/balanced_data.csv')

In [18]:
df.head()

Unnamed: 0.1,Unnamed: 0,Flow ID,Source IP,Source Port,Destination IP,Destination Port,Protocol,Timestamp,Flow Duration,Total Fwd Packets,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,0,35929,2100,443,2744,49916,6,14653,65,3,...,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
1,1,195043,2210,52673,2608,53,17,160,91765,2,...,20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
2,2,58449,1337,45245,1593,53,17,27,295,2,...,40,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
3,3,70012,696,58208,865,53,17,80,53034,4,...,32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN
4,4,31918,2777,443,1590,50506,6,119,71,1,...,32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,BENIGN


In [19]:
df.drop(df.columns[[0]], axis=1, inplace=True)

In [20]:
Y = df.Label 
X = df.drop('Label' , axis =1)

In [21]:
X.shape

(1851553, 83)

In [22]:
Y.shape

(1851553,)

# ***PCA to reduce dimensions to 9***

In [23]:
from sklearn.decomposition import PCA
pca = PCA(n_components=27)
_X = pca.fit_transform(X)

In [24]:
_X.shape

(1851553, 27)

In [26]:
type(_X)

numpy.ndarray

In [27]:
finaldata = pd.DataFrame(_X)

In [28]:
finaldata.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,-36347590.0,-2731521.0,-38744.382421,-910281.2,1488298.0,-601774.713241,-288628.199811,-177674.771568,-211877.939462,-26452.128011,...,-15119.018637,-35919.700881,12746.940366,-608.87669,23997.798092,-6645.309166,1795.652568,-51996.221431,-1956.509157,-4960.959558
1,-36265970.0,-2722872.0,-38307.402398,-1656263.0,1469721.0,-580657.527176,-260199.397541,-164061.224448,-187593.539686,-22322.031527,...,-29093.554233,-19616.181521,16254.674188,-489.082607,26914.149359,-8757.004594,16145.855547,-127267.865542,-1858.779186,-5365.627649
2,-36347170.0,-2731175.0,-38750.355066,-938887.0,1488468.0,-601764.457588,-288570.495872,-177639.614623,-211729.480512,-26214.331486,...,-15047.018792,-35398.97013,12298.060648,-592.635065,21958.369399,-5867.770318,12307.586029,-114121.529494,-1907.699722,-4874.594843
3,-36281160.0,-2716825.0,-38397.053038,-1660352.0,1468725.0,-588455.033899,-268116.333956,-166589.736051,-186343.99147,-24062.757776,...,-18368.612475,-36435.41261,11499.630919,-585.43633,13583.069014,1012.711207,13732.791447,-125995.712993,-1683.267344,-3910.757531
4,-36341300.0,-2722810.0,-38333.914329,-1664156.0,1494745.0,-603205.453279,-289162.646044,-177922.913928,-212200.166507,-26412.311579,...,-15091.885473,-35584.211658,12458.681665,-611.992589,22608.174447,-6077.619714,8973.744947,-95326.817386,-2057.293849,-5189.166174


In [29]:
merged = pd.concat([finaldata,Y ], axis=1)

In [30]:
merged.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,Label
0,-36347590.0,-2731521.0,-38744.382421,-910281.2,1488298.0,-601774.713241,-288628.199811,-177674.771568,-211877.939462,-26452.128011,...,-35919.700881,12746.940366,-608.87669,23997.798092,-6645.309166,1795.652568,-51996.221431,-1956.509157,-4960.959558,BENIGN
1,-36265970.0,-2722872.0,-38307.402398,-1656263.0,1469721.0,-580657.527176,-260199.397541,-164061.224448,-187593.539686,-22322.031527,...,-19616.181521,16254.674188,-489.082607,26914.149359,-8757.004594,16145.855547,-127267.865542,-1858.779186,-5365.627649,BENIGN
2,-36347170.0,-2731175.0,-38750.355066,-938887.0,1488468.0,-601764.457588,-288570.495872,-177639.614623,-211729.480512,-26214.331486,...,-35398.97013,12298.060648,-592.635065,21958.369399,-5867.770318,12307.586029,-114121.529494,-1907.699722,-4874.594843,BENIGN
3,-36281160.0,-2716825.0,-38397.053038,-1660352.0,1468725.0,-588455.033899,-268116.333956,-166589.736051,-186343.99147,-24062.757776,...,-36435.41261,11499.630919,-585.43633,13583.069014,1012.711207,13732.791447,-125995.712993,-1683.267344,-3910.757531,BENIGN
4,-36341300.0,-2722810.0,-38333.914329,-1664156.0,1494745.0,-603205.453279,-289162.646044,-177922.913928,-212200.166507,-26412.311579,...,-35584.211658,12458.681665,-611.992589,22608.174447,-6077.619714,8973.744947,-95326.817386,-2057.293849,-5189.166174,BENIGN


In [32]:
merged.to_csv('/content/drive/MyDrive/pcadata.csv')

## Data Transformation
Convert tabular data to images
Procedures:
1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values

In [7]:
numeric_features = df.dtypes[df.dtypes != 'object'].index
numeric_features

Index(['Flow ID', 'Source IP', 'Source Port', 'Destination IP',
       'Destination Port', 'Protocol', 'Timestamp', 'Flow Duration',
       'Total Fwd Packets', 'Total Backward Packets',
       'Total Length of Fwd Packets', 'Total Length of Bwd Packets',
       'Fwd Packet Length Max', 'Fwd Packet Length Min',
       'Fwd Packet Length Mean', 'Fwd Packet Length Std',
       'Bwd Packet Length Max', 'Bwd Packet Length Min',
       'Bwd Packet Length Mean', 'Bwd Packet Length Std', 'Flow Bytes/s',
       'Flow Packets/s', 'Flow IAT Mean', 'Flow IAT Std', 'Flow IAT Max',
       'Flow IAT Min', 'Fwd IAT Total', 'Fwd IAT Mean', 'Fwd IAT Std',
       'Fwd IAT Max', 'Fwd IAT Min', 'Bwd IAT Total', 'Bwd IAT Mean',
       'Bwd IAT Std', 'Bwd IAT Max', 'Bwd IAT Min', 'Fwd PSH Flags',
       'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags', 'Fwd Header Length',
       'Bwd Header Length', 'Fwd Packets/s', 'Bwd Packets/s',
       'Min Packet Length', 'Max Packet Length', 'Packet Length Mean',
  

In [8]:
scaler = QuantileTransformer() 
df[numeric_features] = scaler.fit_transform(df[numeric_features])

In [9]:
# Multiply the feature values by 255 to transform them into the scale of [0,255]
df[numeric_features] = df[numeric_features].apply(lambda x: (x*255))

In [10]:
df.describe()

Unnamed: 0,Flow ID,Source IP,Source Port,Destination IP,Destination Port,Protocol,Timestamp,Flow Duration,Total Fwd Packets,Total Backward Packets,...,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
count,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,...,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0
mean,127.5287,127.688,127.396,127.4476,127.495,139.4323,127.5085,127.1347,119.4408,123.6704,...,111.5105,127.4265,49.6519,15.5657,49.59934,49.67981,50.59351,18.30005,50.47708,50.55809
std,73.47422,73.52517,73.34071,73.32814,71.8203,76.74451,73.34319,73.43971,82.79394,77.60842,...,89.6458,66.98773,94.17079,60.06469,94.0691,94.2193,94.78025,64.48271,94.56743,94.71722
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,63.95221,64.70721,64.05295,54.36937,41.35135,88.57357,64.06907,63.55856,0.0,78.74625,...,0.0,60.75075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,127.3566,127.8829,127.1471,127.3724,114.0991,88.57357,127.5,127.0178,108.994,145.2402,...,132.0946,129.4144,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,190.9928,190.548,190.703,176.3814,174.7222,255.0,190.6757,190.5139,190.2928,194.1216,...,185.4429,188.1231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,...,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0


All features are in the same scale of [0,255]

### Generate images for each class

In [11]:
df.Label.unique()

array(['BENIGN', 'Infiltration', 'DDoS', 'DoS slowloris',
       'DoS Slowhttptest', 'DoS Hulk', 'DoS GoldenEye', 'Heartbleed',
       'PortScan', 'Bot', 'FTP-Patator', 'SSH-Patator'], dtype=object)

In [12]:
df0=df[df['Label']=='BENIGN'].drop(['Label'],axis=1)
df1=df[df['Label']=='Infiltration'].drop(['Label'],axis=1)
df2=df[df['Label']=='DDoS'].drop(['Label'],axis=1)
df3=df[df['Label']=='DoS slowloris'].drop(['Label'],axis=1)
df4=df[df['Label']=='DoS Slowhttptest'].drop(['Label'],axis=1)
df5=df[df['Label']=='DoS Hulk'].drop(['Label'],axis=1)
df6=df[df['Label']=='DoS GoldenEye'].drop(['Label'],axis=1)
df7=df[df['Label']=='Heartbleed'].drop(['Label'],axis=1)
df8=df[df['Label']=='PortScan'].drop(['Label'],axis=1)
df9=df[df['Label']=='Bot'].drop(['Label'],axis=1)
df10=df[df['Label']=='FTP-Patator'].drop(['Label'],axis=1)
df11=df[df['Label']=='SSH-Patator'].drop(['Label'],axis=1)

In [None]:
# Generate 9*9 color images for class 0 (Normal)
# Change the numbers 9 to the number of features n in your dataset if you use a different dataset, reshape(n,n,3)
count=0
ims = []

image_path = "/content/drive/MyDrive/train/0/"
os.makedirs(image_path)

for i in range(0, len(df0)):  
    count=count+1
    if count<=27: 
        im=df0.iloc[i].values
        ims=np.append(ims,im)
    else:
        ims=np.array(ims).reshape(3,3,3)
        array = np.array(ims, dtype=np.uint8)
        new_image = Image.fromarray(array)
        new_image.save(image_path+str(i)+'.png')
        count=0
        ims = []

In [15]:
len(df.iloc[0].values)

84

In [16]:
df.iloc[0].values

array([90.92183491829532, 142.4455224455224, 24.75975975975976,
       204.2042042042042, 232.01147848355973, 88.57357357357357,
       232.75184555514667, 50.92342342342342, 163.74624624624622,
       78.74624624624624, 107.97297297297297, 80.66066066066065,
       111.8018018018018, 160.55555555555557, 104.90990990990991,
       181.2312312310042, 83.21321321321321, 157.10960960960963,
       84.1066066066066, 0.0, 218.04513752814842, 227.23999077447354,
       24.504504503228226, 96.35588970524239, 42.372372372372375,
       85.63813813813813, 136.8168168168168, 123.83360633360633,
       155.9675313482953, 136.39139139139138, 129.03153153153153, 0.0,
       0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 110.14264264264264,
       65.72822822822823, 231.08847308883594, 223.09309309799403,
       162.34234234234236, 92.78528528528528, 91.38138138138137,
       106.44144144144144, 107.33483483483484, 0.0, 0.0, 0.0, 0.0, 255.0,
       0.0, 0.0, 0.0, 0.0, 91.76426426426427, 105.16516516516516,