<a href="https://colab.research.google.com/github/Chibueze-20/Automatic-Detection-of-HTTP-injection-Attacks-using-CNN-and-DNN/blob/main/Detection_of_HTTP_injection_attack_using_deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Detecting HTTP injection attacks using DNN model and CNN model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import keras.losses as lossx
from keras.layers import (Activation, Conv1D, Dense, Dropout, Embedding,
                          Flatten, Input, MaxPooling1D)
from keras.models import Model, Sequential
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.utils.vis_utils import plot_model

##Define the helper functions

###Extract dataset into pandas dataframe

In [None]:
def ExtractDataframe(csvdataset_path):
    return pd.read_csv(csvdataset_path)


###Build the tokenizer

In [None]:
def BuildTokenizer(vocabulary,sequence):
    char_dictionary={}
    for index, char in enumerate(vocabulary):
        char_dictionary[char] = index + 1
    tokenizer = Tokenizer(num_words=None,char_level=True,oov_token='UNK',lower=False)
    tokenizer.fit_on_texts(sequence)
    tokenizer.word_index=char_dictionary
    tokenizer.word_index[tokenizer.oov_token]= len(char_dictionary.values())+1
    return tokenizer

###Get the maximum length of a http query or body

In [None]:
def MaxRequestLength(requests):
    max_length = 0
    for request in requests:
        if max_length<len(request):
            max_length = len(request)
    return max_length

###Preprocessing and dataset splitting

In [None]:
def PreprocessAndSplit(dataframe,vocabulary,test_split=0):
    requests = dataframe['Parameters'].values
    labels = dataframe['Label'].values
    tokenizer=BuildTokenizer(vocabulary,requests)
    character_indexes = tokenizer.texts_to_sequences(requests)
    max_length = 840
    data = pad_sequences(character_indexes,maxlen=max_length,padding='post')
    targets = to_categorical(labels,num_classes=2)
    if test_split==0:
        return [tokenizer,max_length,len(tokenizer.word_index),data,targets]
    else:
        X_train,X_test,Y_train,Y_test = train_test_split(data,targets,test_size=test_split)
        return [tokenizer,max_length,len(tokenizer.word_index),X_train,X_test,Y_train,Y_test]

###Build the CNN model

In [None]:
def BuildCNN(embeddingSize,inputSize,conv_layers,fully_connected_layers,num_classes,dropout_p,optimizer,loss):
    #model definition
    #embedding layer definition
    Embedding_layer = Embedding(embeddingSize+1,embeddingSize,input_length=inputSize)
    #input layer
    inputs = Input(shape=(inputSize,), name='input', dtype='int64')
    #embedding layer
    model = Embedding_layer(inputs)
    #Conv layers
    for filter_num, filter_size, pooling_size in conv_layers:
        model = Conv1D(filter_num,filter_size)(model)
        model = Activation('relu')(model)
        if pooling_size !=-1:
            model=MaxPooling1D(pool_size=pooling_size)(model)
    #flatten layer
    model = Flatten()(model)
    #Fullly connected layers
    for dense_size in fully_connected_layers:
        model = Dense(dense_size,activation='relu')(model)
        model = Dropout(dropout_p)(model)
    #output layer
    predictions = Dense(num_classes,activation='softmax')(model)
    #model
    CNNmodel = Model(inputs=inputs, outputs=predictions)
    CNNmodel.compile(optimizer=optimizer,loss=loss,metrics=['accuracy'])
    return CNNmodel


###Build the DNN model

In [None]:
def BuildDNN(embeddingSize,inputSize,fully_connected_layers,num_classes,optimizer,loss):
    #model definition
    DNNmodel = Sequential()
    #embedding layer definition
    DNNmodel.add(Embedding(embeddingSize+1,embeddingSize,input_length=inputSize))
    #flatten layer
    DNNmodel.add(Flatten())
    for nodes in fully_connected_layers:
        DNNmodel.add(Dense(nodes,activation='relu'))
        DNNmodel.add(Dropout(0.5))
    #output layer
    DNNmodel.add(Dense(num_classes,activation='softmax'))
    #model
    DNNmodel.compile(optimizer=optimizer,loss=loss,metrics=['accuracy'])
    return DNNmodel

##Get dataset, preproces and build the DNN and CNN models

###extract

In [None]:
train_df = ExtractDataframe('/content/drive/My Drive/Dataset/dataset.csv')
train_df = train_df.loc[:,['Parameters','Label']]

###Preprocess and split

In [None]:
data = PreprocessAndSplit(train_df,"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{} ",0.3)
print("Max query lenght:",data[1])
print("Vocabulary Size:",data[2])
print(data[3],data[5],data[4],data[6],sep='\n')

Max query lenght: 840
Vocabulary Size: 96
[[12 15  1 ...  0  0  0]
 [19 60 19 ...  0  0  0]
 [ 7 54  7 ...  0  0  0]
 ...
 [ 4 20 14 ...  0  0  0]
 [ 1  5  8 ...  0  0  0]
 [19 15 20 ...  0  0  0]]
[[0. 1.]
 [0. 1.]
 [1. 0.]
 ...
 [1. 0.]
 [0. 1.]
 [0. 1.]]
[[16  3 13 ...  0  0  0]
 [ 5 19  1 ...  0  0  0]
 [ 5 15 21 ...  0  0  0]
 ...
 [ 1  9 14 ...  0  0  0]
 [ 5  5  8 ...  0  0  0]
 [ 9 20 20 ...  0  0  0]]
[[0. 1.]
 [1. 0.]
 [0. 1.]
 ...
 [1. 0.]
 [1. 0.]
 [1. 0.]]


###Create CNN Model 

In [None]:
CnnModel = BuildCNN(data[2],data[1],[[256, 7, 3],[256, 7, 3],[256, 3, -1],[256, 3, -1],[256, 3, -1], 
               [256, 3, 3]],[1024,1024],2,0.5,'adam','categorical_crossentropy')
CnnModel.summary()

Model: "model_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 840)               0         
_________________________________________________________________
embedding_11 (Embedding)     (None, 840, 96)           9312      
_________________________________________________________________
conv1d_25 (Conv1D)           (None, 834, 256)          172288    
_________________________________________________________________
activation_25 (Activation)   (None, 834, 256)          0         
_________________________________________________________________
max_pooling1d_13 (MaxPooling (None, 278, 256)          0         
_________________________________________________________________
conv1d_26 (Conv1D)           (None, 272, 256)          459008    
_________________________________________________________________
activation_26 (Activation)   (None, 272, 256)          0  

###Build DNN model

In [None]:
DNNmodel = BuildDNN(data[2],data[1],[1024,1024],2,'sgd','categorical_crossentropy')
DNNmodel.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_14 (Embedding)     (None, 840, 96)           9312      
_________________________________________________________________
flatten_13 (Flatten)         (None, 80640)             0         
_________________________________________________________________
dense_37 (Dense)             (None, 1024)              82576384  
_________________________________________________________________
dropout_25 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_38 (Dense)             (None, 1024)              1049600   
_________________________________________________________________
dropout_26 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_39 (Dense)             (None, 2)                

##Train DNN and CNN model

###Train CNN model

In [None]:
CnnModel.fit(data[3],data[5],batch_size=128,epochs=5,validation_data=(data[4],data[6]))

Train on 30180 samples, validate on 12935 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f5ca9ca8908>

###Train DNN

In [None]:
DNNmodel.fit(data[3],data[5],batch_size=128,epochs=10)

In [None]:
CnnModel.evaluate(data[4],data[6])