## Internet firewall data classification
> #### Author : Muhammad Yousif
> #### Contact hellomyousif@gmail.com

### Importing Necessory libraries

In [1]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Loading dataset

In [2]:
# dataset reading
df = pd.read_csv('log2.csv')
df.head(3)

Unnamed: 0,Source Port,Destination Port,NAT Source Port,NAT Destination Port,Action,Bytes,Bytes Sent,Bytes Received,Packets,Elapsed Time (sec),pkts_sent,pkts_received
0,57222,53,54587,53,allow,177,94,83,2,30,1,1
1,56258,3389,56258,3389,allow,4768,1600,3168,19,17,10,9
2,6881,50321,43265,50321,allow,238,118,120,2,1199,1,1


In [3]:
# dataset information
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65532 entries, 0 to 65531
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Source Port           65532 non-null  int64 
 1   Destination Port      65532 non-null  int64 
 2   NAT Source Port       65532 non-null  int64 
 3   NAT Destination Port  65532 non-null  int64 
 4   Action                65532 non-null  object
 5   Bytes                 65532 non-null  int64 
 6   Bytes Sent            65532 non-null  int64 
 7   Bytes Received        65532 non-null  int64 
 8   Packets               65532 non-null  int64 
 9   Elapsed Time (sec)    65532 non-null  int64 
 10  pkts_sent             65532 non-null  int64 
 11  pkts_received         65532 non-null  int64 
dtypes: int64(11), object(1)
memory usage: 6.0+ MB


In [4]:
# checking null values in dataset
df.isnull().sum()

Source Port             0
Destination Port        0
NAT Source Port         0
NAT Destination Port    0
Action                  0
Bytes                   0
Bytes Sent              0
Bytes Received          0
Packets                 0
Elapsed Time (sec)      0
pkts_sent               0
pkts_received           0
dtype: int64

### Model building and testing

In [5]:
# importing object for splitting data into train and test sets
from sklearn.model_selection import train_test_split

In [6]:
# splitting dataset into train test sets
x = df[['Source Port', 'Destination Port', 'NAT Source Port',
       'NAT Destination Port','Bytes', 'Bytes Sent',
       'Bytes Received', 'Packets', 'Elapsed Time (sec)', 'pkts_sent',
       'pkts_received']]
y = df['Action']

In [7]:
# splitting dataset
x_train, x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=12)

### KNN

In [8]:
# fit model
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(x_train,y_train)

In [9]:
# accuracy score object
from sklearn.metrics import accuracy_score

In [10]:
# making prediction
prediction = knn.predict(x_test)

In [11]:
# evalute models performance
accuracy = accuracy_score(y_test, prediction)
accuracy*100

99.42015716792554

### Decision tree classifier

In [1]:
# importing decision tree model
from sklearn.tree import DecisionTreeClassifier
tree_model = DecisionTreeClassifier()
tree_model.fit(x_train,y_train)


KeyboardInterrupt



In [13]:
# prediction and evalution
prediction = tree_model.predict(x_test)
accuracy = accuracy_score(y_test,prediction)
print(accuracy)

0.9984740978103304


### SVM Classifier

In [14]:
# importing support vector machine model
from sklearn.svm import SVC
svm_model = SVC()
svm_model.fit(x_train,y_train)

In [15]:
# making prediction
pred = svm_model.predict(x_test)

In [16]:
# model evaluation
accuracy = accuracy_score(y_test,pred)
accuracy

0.7400625619897765

### Randome forest classifier

In [17]:
# importing randome forest classifier
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(x_train,y_train)

In [18]:
# making prediction
prediction = rfc.predict(x_test)

In [19]:
# model evaluation
accuracy = accuracy_score(y_test,prediction)
accuracy

0.9983978027008469

In [20]:
# save the model
# import pickle
# filename = 'Internet-Firewall-access-Classification.sav'
# pickle.dump(rfc, open(filename, 'wb'))