<a href="https://colab.research.google.com/github/DiaconuDenis/Intrusion-Detection-System-IDS-Using-Machine-Learning/blob/main/ids_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing the libraries

In [None]:
pip install scapy

Collecting scapy
  Downloading scapy-2.6.1-py3-none-any.whl.metadata (5.6 kB)
Downloading scapy-2.6.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scapy
Successfully installed scapy-2.6.1


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Importing the dataset

In [None]:
df = pd.read_csv('Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv', encoding='latin1')


In [None]:
indices_to_drop = [14, 15, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
                   62, 63, 64, 65, 66, 67, 68, 69]

df.drop(df.columns[indices_to_drop], axis=1, inplace=True)


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225745 entries, 0 to 225744
Data columns (total 56 columns):
 #   Column                        Non-Null Count   Dtype  
---  ------                        --------------   -----  
 0    Destination Port             225745 non-null  int64  
 1    Flow Duration                225745 non-null  int64  
 2    Total Fwd Packets            225745 non-null  int64  
 3    Total Backward Packets       225745 non-null  int64  
 4   Total Length of Fwd Packets   225745 non-null  int64  
 5    Total Length of Bwd Packets  225745 non-null  int64  
 6    Fwd Packet Length Max        225745 non-null  int64  
 7    Fwd Packet Length Min        225745 non-null  int64  
 8    Fwd Packet Length Mean       225745 non-null  float64
 9    Fwd Packet Length Std        225745 non-null  float64
 10  Bwd Packet Length Max         225745 non-null  int64  
 11   Bwd Packet Length Min        225745 non-null  int64  
 12   Bwd Packet Length Mean       225745 non-nul

# Creating X and y

In [None]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Encoding the variables

In [None]:
from sklearn.preprocessing import LabelEncoder,StandardScaler
scaler = StandardScaler()
le = LabelEncoder()
y = le.fit_transform(y)
X = scaler.fit_transform(X)
print(X)

[[ 2.32783109 -0.51521027 -0.18640643 ... -0.28313719 -0.47836378
  -0.39107132]
 [ 2.33739848 -0.51520691 -0.25124533 ... -0.28313719 -0.47836378
  -0.39107132]
 [ 2.3374491  -0.51520872 -0.25124533 ... -0.28313719 -0.47836378
  -0.39107132]
 ...
 [ 2.65737453 -0.51520799 -0.25124533 ... -0.28313719 -0.47836378
  -0.39107132]
 [ 2.65474223 -0.51520885 -0.18640643 ... -0.28313719 -0.47836378
  -0.39107132]
 [ 2.65489409 -0.51520821 -0.25124533 ... -0.28313719 -0.47836378
  -0.39107132]]


# Splitting the data into training set and test set

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Tunning the model

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
param_grid = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

print("Grid de hiperparametri definit!")

Grid de hiperparametri definit!


# Training the model

In [None]:
model = RandomForestClassifier(n_estimators=100, min_samples_split=5, min_samples_leaf=1, max_features='sqrt', max_depth=20, random_state=42, n_jobs=-1)
model.fit(X_train,y_train)

# Making a prediction

In [None]:
y_pred = model.predict(X_test)

# Testing the accuracy

In [None]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(accuracy_score(y_test, y_pred))
print("Raport de clasificare:\n", classification_report(y_test, y_pred))

[[19544     0]
 [    3 25602]]
0.9999335533455891
Raport de clasificare:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     19544
           1       1.00      1.00      1.00     25605

    accuracy                           1.00     45149
   macro avg       1.00      1.00      1.00     45149
weighted avg       1.00      1.00      1.00     45149



# Salvarea modelului

In [None]:
import joblib
joblib.dump(model, "random_forest_ids.pkl")

['random_forest_ids.pkl']

# Testarea modelului pe o retea

In [None]:
from scapy.all import sniff

rf_model = joblib.load("random_forest_ids.pkl")

def extract_features(packet):
    try:
        features = [
            len(packet),
            packet.time,
            packet[0].ttl if packet.haslayer('IP') else 0,
            packet[0].sport if packet.haslayer('TCP') else 0,
            packet[0].dport if packet.haslayer('TCP') else 0,
            1 if packet.haslayer('TCP') else 0,
            1 if packet.haslayer('UDP') else 0,
            1 if packet.haslayer('ICMP') else 0,
        ]
        return np.array(features).reshape(1, -1)
    except:
        return None

def detect_attack(packet):
    features = extract_features(packet)
    if features is not None:
        prediction = rf_model.predict(features)
        if prediction == 1:
            print("⚠️ ALARMĂ! Atac detectat!")
        else:
            print("✅ Pachet benign.")

print("🔍 Începem monitorizarea traficului de rețea...")
sniff(prn=detect_attack, count=10)




🔍 Începem monitorizarea traficului de rețea...


<Sniffed: TCP:1 UDP:0 ICMP:0 Other:0>