In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import MinMaxScaler
import random
random.seed(0)
tf.random.set_seed(0)
np.random.seed(0)

## **Load the Data**

In [2]:
train = pd.read_csv('/content/train_kdd_nsl.csv')
test = pd.read_csv('/content/test_kdd_nsl.csv')

In [3]:
train = train.drop_duplicates()
train = train.drop(columns = ['service','land','wrong_fragment','urgent','hot','num_failed_logins','rerror_rate','srv_rerror_rate','is_guest_login','is_hot_login','num_outband_cmds','num_access_files','num_shells','num_compromised'],axis=0)

In [4]:
test = test.drop_duplicates()
test = test.drop(columns = ['service','land','wrong_fragment','urgent','hot','num_failed_logins','rerror_rate','srv_rerror_rate','is_guest_login','is_hot_login','num_outband_cmds','num_access_files','num_shells','num_compromised'],axis=0)

In [5]:
train = train.loc[(train["connection_type"] == 'normal')|(train["connection_type"] == 'satan')]
test = test.loc[(test["connection_type"] == 'normal')|(test["connection_type"] == 'satan')]

In [6]:
train = train.reset_index(drop = True)   #reset index for the train data
test = test.reset_index(drop = True)     #reset index for the test data

In [7]:
train['protocole_type'] = [0.8 if elem == 'tcp' else 0.15 if elem == 'udp' else 0.05 for elem in train['protocole_type']]
train['flag'] = [0.6 if elem=='SF' else 0.2 if elem=='S0' else 0.1 if elem=='REJ' else 0.05 for elem in train['flag']]

In [8]:
test['protocole_type'] = [0.8 if elem == 'tcp' else 0.15 if elem == 'udp' else 0.05 for elem in test['protocole_type']]
test['flag'] = [0.6 if elem=='SF' else 0.2 if elem=='S0' else 0.1 if elem=='REJ' else 0.05 for elem in test['flag']]

In [9]:
#This function is used to encode a vector as follows:
'''
when the vector element == 'normal', it will be transformed to 1 and to 0 if not
'''
def encoding(y):
  enc = [1 if y[i]=='normal' else 0 for i in range(len(y))]
  return pd.DataFrame(enc)

In [10]:
y_train = train['connection_type']
y_test = test['connection_type']

In [11]:
y_train = np.array(encoding(y_train))
y_test = np.array(encoding(y_test))

In [12]:
train = train.drop(['connection_type'],axis=1)
test = test.drop(['connection_type'],axis=1)

In [13]:
scaler = MinMaxScaler()
train = scaler.fit_transform(train)
train = np.array(train)
train = train / train.sum(axis=1, keepdims=True)
train = np.clip(train,0.0000005,None)

In [14]:
scaler = MinMaxScaler()
test = scaler.fit_transform(test)
test = np.array(test)
test = test / test.sum(axis=1, keepdims=True)
test = np.clip(test,0.0000005,None)

## **SVM**

In [15]:
from sklearn.svm import SVC
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, accuracy_score

In [16]:
# Train SVM classifier
print("Training SVM classifier...")
svm = SVC(kernel='rbf', C=20,gamma='auto', random_state=42)
svm.fit(train, np.ravel(y_train))

Training SVM classifier...


In [17]:
# Test SVM classifier
print("Testing SVM classifier...")
y_pred_svm = svm.predict(test)

Testing SVM classifier...


In [18]:
def re_encoding(y):
  enc = [0 if y[i]==1 else 1 for i in range(len(y))]
  return pd.DataFrame(enc)

In [19]:
y_tesst = np.array(re_encoding(y_test))
y_pred_svm = np.array(re_encoding(y_pred_svm))

In [20]:
# Evaluate the performance of the classifier
print("F1 Score:", f1_score(y_tesst, y_pred_svm))
print("Precision:", precision_score(y_tesst, y_pred_svm))
print("Recall:", recall_score(y_tesst, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_tesst, y_pred_svm))
print("Accuracy:", accuracy_score(y_tesst, y_pred_svm))

F1 Score: 0.8473479948253557
Precision: 0.8076448828606658
Recall: 0.891156462585034
Confusion Matrix:
 [[9555  156]
 [  80  655]]
Accuracy: 0.977407620141681
