<a href="https://colab.research.google.com/github/Lixx-webdev/hybridids/blob/main/Hybrid_IDS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Hybrid Intrusion Detection System (IDS)
An AI-powered IDS combining Signature-based and Anomaly-based detection.


# Dataset : CICIDS 2017


## Dataset Description
- Source: Canadian Institute for Cybersecurity
- Traffic types: BENIGN + DoS, DDoS, Bot, PortScan, Web Attack, etc.


In [6]:
import pandas as pd
csv_path = '/content/drive/MyDrive/archive/friday.csv'

df = pd.read_csv(csv_path)
df.head()



Unnamed: 0,Src IP dec,Src Port,Dst IP dec,Dst Port,Protocol,Timestamp,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,...,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,ICMP Code,ICMP Type,Total TCP Flow Time,Label,Attempted Category
0,3232238130,56108,3232238083,3268,6,59:50.3,112740690,32,16,6448,...,343,16105402.14,498804.8,16399772,15375229,-1,-1,112740690,BENIGN,-1
1,3232238130,42144,3232238083,389,6,59:50.3,112740560,32,16,6448,...,285,16105427.43,498793.7,16399782,15375263,-1,-1,112740560,BENIGN,-1
2,134610945,0,134219268,0,0,00:31.4,113757377,545,0,0,...,19,12210358.4,6935824.0,20757030,5504997,-1,-1,0,BENIGN,-1
3,3232238105,5353,3758096635,5353,17,00:42.9,91997219,388,0,37151,...,16,13197637.0,5826905.0,19776791,5817470,-1,-1,0,BENIGN,-1
4,3232238105,123,301796989,123,17,00:42.4,66966070,6,6,288,...,1968172,64974431.0,0.0,64974431,64974431,-1,-1,0,BENIGN,-1


In [7]:
import os
import pandas as pd

folder_path = '/content/drive/MyDrive/archive'  # Update path if needed

# List all CSVs
files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

# Combine into one DataFrame
dfs = []
for file in files:
    path = os.path.join(folder_path, file)
    df = pd.read_csv(path)
    dfs.append(df)

combined_df = pd.concat(dfs, ignore_index=True)
combined_df.head()


Unnamed: 0,Src IP dec,Src Port,Dst IP dec,Dst Port,Protocol,Timestamp,Flow Duration,Total Fwd Packet,Total Bwd packets,Total Length of Fwd Packet,...,Local_5,Local_6,Local_7,Local_8,Local_9,Local_10,Local_11,Local_12,Local_13,Local_14
0,3232238130,56108,3232238083,3268,6,59:50.3,112740690,32,16,6448,...,,,,,,,,,,
1,3232238130,42144,3232238083,389,6,59:50.3,112740560,32,16,6448,...,,,,,,,,,,
2,134610945,0,134219268,0,0,00:31.4,113757377,545,0,0,...,,,,,,,,,,
3,3232238105,5353,3758096635,5353,17,00:42.9,91997219,388,0,37151,...,,,,,,,,,,
4,3232238105,123,301796989,123,17,00:42.4,66966070,6,6,288,...,,,,,,,,,,


# Data Cleaning



In [13]:
df['Label'].value_counts()



Unnamed: 0_level_0,count
Label,Unnamed: 1_level_1
BENIGN,319113
DoS Hulk,158468
DoS GoldenEye,7567
DoS Slowloris,3859
DoS Slowhttptest - Attempted,3368
DoS Slowloris - Attempted,1847
DoS Slowhttptest,1740
DoS Hulk - Attempted,581
DoS GoldenEye - Attempted,80
Heartbleed,11


In [11]:
import numpy as np

# Replace inf values with NaN and drop them
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

# Optional: drop duplicate rows if needed
df.drop_duplicates(inplace=True)

df.shape  # Check new shape


(496634, 105)

# Labeling

In [17]:
# Map attempted variants to their base type
label_map = {
    'BENIGN': 'BENIGN',
    'DoS Hulk': 'DoS',
    'DoS Hulk - Attempted': 'DoS',
    'DoS GoldenEye': 'DoS',
    'DoS GoldenEye - Attempted': 'DoS',
    'DoS Slowloris': 'DoS',
    'DoS Slowloris - Attempted': 'DoS',
    'DoS Slowhttptest': 'DoS',
    'DoS Slowhttptest - Attempted': 'DoS',
    'Heartbleed': 'Heartbleed'
}

df['AttackType'] = df['Label'].map(label_map)


In [18]:
## Create Binary Labels for Anomaly Detection

df['BinaryLabel'] = df['AttackType'].apply(lambda x: 0 if x == 'BENIGN' else 1)


In [20]:
df['AttackType'].value_counts()


Unnamed: 0_level_0,count
AttackType,Unnamed: 1_level_1
BENIGN,319113
DoS,177510
Heartbleed,11
