In [1]:
#import relevant modules
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from matplotlib import pyplot as plt
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dense

# The following lines adjust the granularity of reporting. 
pd.options.display.max_rows = 10
pd.options.display.float_format = "{:.1f}".format

# The following line improves formatting when ouputting NumPy arrays.
np.set_printoptions(linewidth = 200)

In [2]:
#dataset
dataset = pd.read_csv('log2.csv')
dataset['Action'] = np.where(dataset['Action'] == 'allow', 1, 0) #encoding dependant output
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Encoding data
    Action the dependant variable contain 3 targets
* allow -> [100]
* deny  -> [010]
* drop  -> [001]

#Encoding data
dummies = pd.get_dummies(dataset.Action)  
merged = pd.concat([dataset,dummies],axis=1)
final = merged.drop(['Action'], axis=1)

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [4]:
y_train

array([1, 0, 1, ..., 1, 0, 0])

## Performing Feature Scalling
* As you can see in the dataset, all values are not in the same range,  and that requires a lot of time for calculation. So to    overcome this problem, we perform feature scaling.


* Feature scaling help us to normalize the data within a particular range.

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_normalized = sc.fit_transform(X_train)
X_test_normalized = sc.fit_transform(X_test)

In [6]:
X.shape

(65532, 11)

In [7]:
def plot_curve(epochs, hist, list_of_metrics):
    plt.figure()
    plt.xlabel("Epoch")
    plt.ylabel("Value")
    
    for m in list_of_metrics:
        x = hist[m]
        plt.plot(epochs[1:], x[1:], label=m)
    
    plt.legend()

print("Loaded the plot_curve function.")

Loaded the plot_curve function.


## Create a Deep Neural Net Model

* input 11 unites, for 11 features we have in Dataset
* hidden 150 unites estimated for better performance
* output 3 unites we have [100], [010], [001] three labes index of 1 will define the target

In [8]:
def create_model(my_learning_rate):
    model = tf.keras.models.Sequential()
  
  # Define the input layer and first hidden layer.
    model.add(Dense(units=11, kernel_initializer='normal', activation='relu'))
    
# Define the input layer and first hidden layer.
    model.add(Dense(units=150, activation='relu'))
  # Define a dropout regularization layer.
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(rate=0.2))

  # Define the output layer. 
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))     
                           
  # Construct the layers into a model that TensorFlow can execute.
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

In [9]:
def train_model(model, train_features, train_label, epochs,
                batch_size, validation_split):
    
    history = model.fit(train_features, train_label, batch_size,
                      epochs=epochs, 
                      validation_split=validation_split)
 
  # To track the progression of training, gather a snapshot
  # of the model's metrics at each epoch. 
    epochs = history.epoch
    hist = pd.DataFrame(history.history)
    return epochs, hist 

In [None]:
#hyperparameters
learning_rate = 0.003 #
epochs = 10
batch_size = 30 #
validation_split = 0.2 #

# Establish the model's topography.
my_model = create_model(learning_rate)

# Train the model on the normalized training set.
epochs, hist = train_model(my_model, X_train_normalized, y_train, 
                           epochs, batch_size, validation_split)
# Plot a graph of the metric vs. epochs.
list_of_metrics_to_plot = ['accuracy']
plot_curve(epochs, hist, list_of_metrics_to_plot)

# Evaluate against the test set.
print("\n Evaluate the new model against the test set:")
my_model.evaluate(x=X_test_normalized, y=y_test, batch_size=batch_size)

In [11]:
my_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 11)                132       
                                                                 
 dense_1 (Dense)             (None, 150)               1800      
                                                                 
 batch_normalization (BatchN  (None, 150)              600       
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 150)               0         
                                                                 
 dense_2 (Dense)             (None, 1)                 151       
                                                                 
Total params: 2,683
Trainable params: 2,383
Non-trainable params: 300
____________________________________________________

In [14]:
my_model.save('logsModel.h5')