#About the project of CIC-IDS2017

This project focuses on the CIC-IDS2017 dataset is a widely used cybersecurity dataset for Intrusion Detection Systems (IDS). It contains labeled network traffic data with both benign (normal) traffic and various cyber-attacks.

This is being trained is Long Short Term Memory

Use of LSTM Architecture
LSTM (Long Short-Term Memory) networks are a type of Recurrent Neural Network (RNN) that excel at analyzing sequential data, such as network traffic logs. Since network packets flow over time, LSTMs can learn patterns to detect anomalies, cyber-attacks, or malicious traffic.

In [None]:
!pip install pandas numpy scikit-learn tensorflow




#Loading and Previewing Datasets

In [None]:
import pandas as pd


df = pd.read_csv(r"Monday-WorkingHours.pcap_ISCX.csv")
print(df.head())
print(df.columns)


    Destination Port   Flow Duration   Total Fwd Packets  \
0              49188               4                   2   
1              49188               1                   2   
2              49188               1                   2   
3              49188               1                   2   
4              49486               3                   2   

    Total Backward Packets  Total Length of Fwd Packets  \
0                        0                           12   
1                        0                           12   
2                        0                           12   
3                        0                           12   
4                        0                           12   

    Total Length of Bwd Packets   Fwd Packet Length Max  \
0                             0                       6   
1                             0                       6   
2                             0                       6   
3                             0                 

In [None]:
df.isnull().sum()

In [None]:
df.columns = df.columns.str.strip()

In [None]:
df["Label"] = df["Label"].apply(lambda x: 0 if "Normal" in str(x) else 1)

In [None]:
X.replace([np.inf, -np.inf], np.nan, inplace=True)

#Preprocessing the data using Sklearn

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split


df = pd.read_csv("Monday-WorkingHours.pcap_ISCX.csv")

df.columns = df.columns.str.strip()

df["Label"] = df["Label"].apply(lambda x: 0 if "Normal" in str(x) else 1)

# Drop unnecessary columns
drop_cols = ["Flow Bytes/s", " Flow Packets/s"]
df.drop(columns=drop_cols, inplace=True, errors="ignore")

# Fill missing values
df.fillna(0, inplace=True)

# Split features & labels
X = df.drop(columns=["Label"])
y = df["Label"]


# Replace infinite values with NaN
X.replace([np.inf, -np.inf], np.nan, inplace=True)

# Filling NaN with median
for col in X.columns:
    X[col] = X[col].fillna(X[col].median())


#Apply Normalisation to the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print("✅ Data preprocessing completed successfully!")
print(f"Training samples: {X_train.shape}, Testing samples: {X_test.shape}")

✅ Data preprocessing completed successfully!
Training samples: (51692, 77), Testing samples: (12924, 77)


#Training and Testing the model Using Tensorflow

In [None]:
X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)


model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.3),
    LSTM(32),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")
])


model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])


history=model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


model.save("ids_lstm_model.h5")


  super().__init__(**kwargs)


Epoch 1/10
[1m1616/1616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 6ms/step - accuracy: 0.9998 - loss: 0.0804 - val_accuracy: 1.0000 - val_loss: 1.1563e-04
Epoch 2/10
[1m1616/1616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 1.0000 - loss: 1.0443e-04 - val_accuracy: 1.0000 - val_loss: 5.2809e-05
Epoch 3/10
[1m1616/1616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 1.0000 - loss: 4.1238e-05 - val_accuracy: 1.0000 - val_loss: 2.1875e-05
Epoch 4/10
[1m1616/1616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 1.0000 - loss: 1.2360e-05 - val_accuracy: 1.0000 - val_loss: 9.4241e-06
Epoch 5/10
[1m1616/1616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 1.0000 - loss: 2.7242e-06 - val_accuracy: 1.0000 - val_loss: 3.7972e-06
Epoch 6/10
[1m1616/1616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 1.0000 - loss: 7.7461e-07 - val_accurac



In [None]:
model.predict(X_test)

Check the .h5 model

In [None]:
from google.colab import files
files.download("ids_lstm_model.h5")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

View using the Streamlit

In [None]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.44.0-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.44.0-py3-none-any.whl (9.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [None]:
import streamlit as st
import numpy as np
import tensorflow as tf


model = tf.keras.models.load_model("ids_lstm_model.h5")


st.title("AI-Powered Intrusion Detection System")
st.write("Enter network traffic data to detect anomalies!")


user_input = st.text_input("Enter Network Data (csv)")


def predict_intrusion(data):
    try:
        data = np.array([list(map(float, data.split(",")))])
        prediction = model.predict(data)
        return "Attack Detected!" if prediction[0][0] > 0.5 else "Normal Traffic"
    except:
        return "Invalid input! Please enter correct values."


if st.button("Detect Intrusion"):
    result = predict_intrusion(user_input)
    st.subheader(result)


