In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import math
import random
import matplotlib.pyplot as plt
import shutil
from sklearn.preprocessing import QuantileTransformer
from PIL import Image
import warnings
warnings.filterwarnings("ignore")

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
#Read dataset
df=pd.read_csv('/content/drive/MyDrive/pcadata.csv')

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,24,25,26,Label
0,0,-36347590.0,-2731521.0,-38744.382421,-910281.2,1488298.0,-601774.713241,-288628.199811,-177674.771568,-211877.939462,...,-35919.700881,12746.940366,-608.87669,23997.798092,-6645.309166,1795.652568,-51996.221431,-1956.509157,-4960.959558,BENIGN
1,1,-36265970.0,-2722872.0,-38307.402398,-1656263.0,1469721.0,-580657.527176,-260199.397541,-164061.224448,-187593.539686,...,-19616.181521,16254.674188,-489.082607,26914.149359,-8757.004594,16145.855547,-127267.865542,-1858.779186,-5365.627649,BENIGN
2,2,-36347170.0,-2731175.0,-38750.355066,-938887.0,1488468.0,-601764.457588,-288570.495872,-177639.614623,-211729.480512,...,-35398.97013,12298.060648,-592.635065,21958.369399,-5867.770318,12307.586029,-114121.529494,-1907.699722,-4874.594843,BENIGN
3,3,-36281160.0,-2716825.0,-38397.053038,-1660352.0,1468725.0,-588455.033899,-268116.333956,-166589.736051,-186343.99147,...,-36435.41261,11499.630919,-585.43633,13583.069014,1012.711207,13732.791447,-125995.712993,-1683.267344,-3910.757531,BENIGN
4,4,-36341300.0,-2722810.0,-38333.914329,-1664156.0,1494745.0,-603205.453279,-289162.646044,-177922.913928,-212200.166507,...,-35584.211658,12458.681665,-611.992589,22608.174447,-6077.619714,8973.744947,-95326.817386,-2057.293849,-5189.166174,BENIGN


In [5]:
# The labels of the dataset. "R" indicates normal patterns, and there are four types of attack (DoS, fuzzy. gear spoofing, and RPM spoofing zttacks)
df.Label.value_counts()

BENIGN              1296087
DoS Hulk             231073
PortScan             158930
DDoS                 128027
DoS GoldenEye         10293
FTP-Patator            7938
SSH-Patator            5897
DoS slowloris          5796
DoS Slowhttptest       5499
Bot                    1966
Infiltration             36
Heartbleed               11
Name: Label, dtype: int64

## Data Transformation
Convert tabular data to images
Procedures:
1. Use quantile transform to transform the original data samples into the scale of [0,255], representing pixel values
2. Generate images for each category (Normal, DoS, Fuzzy, Gear, RPM), each image consists of 27 data samples with 9 features. Thus, the size of each image is 9*9*3, length 9, width 9, and 3 color channels (RGB).

In [6]:
# Transform all features into the scale of [0,1]
numeric_features = df.dtypes[df.dtypes != 'object'].index
scaler = QuantileTransformer() 
df[numeric_features] = scaler.fit_transform(df[numeric_features])

In [7]:
# Multiply the feature values by 255 to transform them into the scale of [0,255]
df[numeric_features] = df[numeric_features].apply(
    lambda x: (x*255))

In [8]:
df.describe()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,17,18,19,20,21,22,23,24,25,26
count,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,...,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0,1851553.0
mean,127.2377,128.0228,127.7231,127.0968,127.1711,127.3975,127.8763,127.732,127.4455,127.3874,...,127.3885,127.1948,127.2903,127.432,127.7247,127.4306,127.4435,127.7418,127.7518,127.3668
std,73.71005,73.67986,73.55667,73.84735,73.41446,73.6579,73.53552,73.52789,73.77063,73.58242,...,73.46742,73.69892,73.67885,73.51561,73.46544,73.57837,73.6214,73.64223,73.73318,73.77156
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,63.38594,64.21029,64.16506,62.72569,63.81273,63.79979,64.43191,64.17383,63.41363,63.71468,...,64.01838,63.1389,63.38609,64.16985,64.27378,63.62019,63.84653,63.99797,63.72869,63.12532
50%,126.7835,128.1192,127.8062,127.1381,126.8214,127.3684,127.8812,127.7434,127.3065,127.3329,...,127.1384,126.9673,127.6028,127.0131,127.5924,127.4769,127.3239,127.8008,127.8747,127.4936
75%,191.3166,192.0368,191.7095,191.1473,190.7008,190.8237,191.6293,191.7583,191.7477,191.041,...,190.7343,191.2179,191.1029,191.0046,191.1599,191.0697,191.4962,191.5783,191.7065,191.4973
max,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,...,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0,255.0


In [9]:
df.drop(df.columns[0],inplace=True , axis=1)

All features are in the same scale of [0,255]
### Generate images for each class

In [10]:
df.Label.unique()

array(['BENIGN', 'Infiltration', 'DDoS', 'DoS slowloris',
       'DoS Slowhttptest', 'DoS Hulk', 'DoS GoldenEye', 'Heartbleed',
       'PortScan', 'Bot', 'FTP-Patator', 'SSH-Patator'], dtype=object)

In [11]:
df0=df[df['Label']=='BENIGN'].drop(['Label'],axis=1)
df1=df[df['Label']=='Infiltration'].drop(['Label'],axis=1)
df2=df[df['Label']=='DDoS'].drop(['Label'],axis=1)
df3=df[df['Label']=='DoS slowloris'].drop(['Label'],axis=1)
df4=df[df['Label']=='DoS Slowhttptest'].drop(['Label'],axis=1)
df5=df[df['Label']=='DoS Hulk'].drop(['Label'],axis=1)
df6=df[df['Label']=='DoS GoldenEye'].drop(['Label'],axis=1)
df7=df[df['Label']=='Heartbleed'].drop(['Label'],axis=1)
df8=df[df['Label']=='PortScan'].drop(['Label'],axis=1)
df9=df[df['Label']=='Bot'].drop(['Label'],axis=1)
df10=df[df['Label']=='FTP-Patator'].drop(['Label'],axis=1)
df11=df[df['Label']=='SSH-Patator'].drop(['Label'],axis=1)

In [12]:
type(df0.iloc[0].values)

numpy.ndarray

In [13]:
type(df0)

pandas.core.frame.DataFrame

In [14]:
image_path = "/content/drive/MyDrive/train/benign/"
os.makedirs(image_path)

for i in range(0, 10):
  ims = df0.iloc[i].values.reshape(3,3,3)
  array = np.array(ims, dtype=np.uint8)      
  new_image = Image.fromarray(array)
  new_image.save(image_path+str(i)+'.png')      

In [15]:
df0.iloc[0].values.reshape(3,3,3)

array([[[ 36.77410386,  69.20489401,  81.35962873],
        [177.28457372, 147.73500634, 102.96581601],
        [111.36794477, 100.38345868, 117.56458113]],

       [[ 48.24545578, 107.52750524, 121.47164109],
        [ 89.19909064, 177.96074076,  69.91276742],
        [ 98.89758527,  90.91123495, 138.09693154]],

       [[ 93.03238686, 145.90697867,  97.55174086],
        [165.97917657,  89.00669984,  55.39769891],
        [185.33392427,  96.3195527 ,  99.83769007]]])

In [15]:
image_path = "/content/drive/MyDrive/train/benign/"
os.makedirs(image_path)

for i in range(0, 10):
  ims = df0.iloc[i].values.reshape(3,3,3)
  array = np.array(ims, dtype=np.uint8)      
  new_image = Image.fromarray(array)
  new_image.save(image_path+str(i)+'.png')  