# Cyber Attacks Classification
Author : Muhammad Yousif 

Contact : hellomyousif@gmail.com

### Importing libraries

In [1]:
# importing libraries
import pandas as pd
import numpy as np
import seaborn as sns

### Loading dataset

In [2]:
# Loading dataset
df = pd.read_csv('cybersecurity_attacks_clean.csv')
df.head(3)

Unnamed: 0.1,Unnamed: 0,Timestamp,Source IP Address,Destination IP Address,Source Port,Destination Port,Protocol,Packet Length,Packet Type,Traffic Type,...,Action Taken,Severity Level,User Information,Device Information,Network Segment,Geo-location Data,Proxy Information,Firewall Logs,IDS/IPS Alerts,Log Source
0,2,2022-11-13 08:23:25,63.79.210.48,198.219.82.17,16811,53600,UDP,306,Control,HTTP,...,Ignored,Low,Himmat Karpe,Mozilla/5.0 (compatible; MSIE 9.0; Windows NT ...,Segment C,"Bokaro, Rajasthan",114.133.48.179,Log Data,Alert Data,Firewall
1,7,2023-02-12 07:13:17,11.48.99.245,178.157.14.116,34489,20396,ICMP,1022,Data,DNS,...,Logged,High,Yuvaan Dubey,Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_7_6...,Segment A,"Phagwara, Andhra Pradesh",192.31.159.5,Log Data,Alert Data,Firewall
2,46,2023-05-16 13:01:56,170.211.138.30,172.97.181.148,25022,6593,TCP,554,Control,DNS,...,Blocked,High,Aradhya Kamdar,Mozilla/5.0 (iPod; U; CPU iPhone OS 3_3 like M...,Segment A,"Amravati, Kerala",95.170.137.42,Log Data,Alert Data,Firewall


In [3]:
# drop null values from data
df.drop(columns='Unnamed: 0',inplace=True,axis=1)

In [4]:
# central tendencies of data
df.describe()

Unnamed: 0,Source Port,Destination Port,Packet Length,Anomaly Scores
count,1237.0,1237.0,1237.0,1237.0
mean,33930.291027,33388.203719,784.83266,50.048593
std,18731.627558,18599.370814,419.686322,28.754095
min,1109.0,1041.0,64.0,0.03
25%,17763.0,17240.0,416.0,26.97
50%,35259.0,33574.0,791.0,48.99
75%,50057.0,49881.0,1159.0,75.17
max,65493.0,65451.0,1500.0,99.98


### splitting data

In [5]:
# splitting data
X = df.drop(columns=['Action Taken','Timestamp'])

In [6]:
# set y axis
y = df['Action Taken']

In [7]:
# encoding input data
from sklearn.preprocessing._encoders import OrdinalEncoder
OE = OrdinalEncoder()
X = OE.fit_transform(X)

In [8]:
# encoding label
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
y = encoder.fit_transform(y)
print("0 Ignore 1:Logged, 2: Blocked")

0 Ignore 1:Logged, 2: Blocked


### Splitting data into train test sets

In [9]:
# Splitting data into train test sets
from sklearn.model_selection import train_test_split
X_train, y_train,X_test, y_test = train_test_split(X,y,test_size=0.2)

### Defining the model

In [10]:
# defining the model
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

### Fitting the SVM Classifier

In [11]:
svm_model = SVC()
svm_model.fit(X,y)
pred = svm_model.predict(X)
accuracy = accuracy_score(pred,y)
print(f"SVM Model's Accuracy Score :{round(accuracy,2)*100} %")

SVM Model's Accuracy Score :47.0 %


### Fitting the KNN Classifier

In [12]:
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X,y)
pred = knn_model.predict(X)
accuracy = accuracy_score(pred,y)
print(f"KNN Model's Accuracy Score :{round(accuracy,2)*100} %")

KNN Model's Accuracy Score :62.0 %


### Fitting the Gaussian Naive Bayes Classifier

In [13]:
nb_model = GaussianNB()
nb_model.fit(X,y)
pred = nb_model.predict(X)
accuracy = accuracy_score(pred,y)
print(f"Naive Bayes Model's Accuracy Score :{round(accuracy,2)*100} %")

Naive Bayes Model's Accuracy Score :39.0 %


### Fitting the Randome forest Classifier

In [14]:
rfc_model = RandomForestClassifier(n_estimators=24,criterion='log_loss',max_depth=7,min_samples_split=3)
rfc_model.fit(X,y)
pred = rfc_model.predict(X)
accuracy = accuracy_score(pred,y)
print(f"Randome Forest Model's Accuracy Score :{round(accuracy,2)*100} %")

Randome Forest Model's Accuracy Score :81.0 %


### Fitting the Gradient boosting Classifier

In [15]:
gbc_model = GradientBoostingClassifier()
gbc_model.fit(X,y)
pred = gbc_model.predict(X)
accuracy = accuracy_score(pred,y)
print(f"Gradient Boosting Model's Accuracy Score :{round(accuracy,2)*100} %")

Gradient Boosting Model's Accuracy Score :85.0 %


### Savin the model

In [20]:
import pickle
filename = 'Cybersecurity_attacks_classification.sav'
pickle.dump(gbc_model, open(filename, 'wb'))