<a href="https://colab.research.google.com/github/Prasaad-G/Intrusion-Detection-System/blob/main/IDS_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas scikit-learn joblib numpy


import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report



In [None]:
# Load the dataset
file_path = "/content/network_data3.csv"
df = pd.read_csv(file_path)


print("Dataset Preview:")
print(df.head())


print("\nColumns in the dataset:")
print(df.columns)

# Rename columns to match expected format
column_mapping = {
    'ip.src': 'protocol_type',
    'ip.dst': 'service',
    'info': 'flag'
}
df.rename(columns=column_mapping, inplace=True)

# Add 'label' column (dummy for now if missing)
if 'label' not in df.columns:
    df['label'] = 'unknown'

# Keep only relevant columns
df = df[['protocol_type', 'service', 'flag', 'label']]


df.fillna('unknown', inplace=True)

# Print updated column names
print("\nUpdated Columns:")
print(df.columns)

# Print missing values count
print("\nMissing Values:")
print(df.isnull().sum())

Dataset Preview:
   frame.time_epoch           ip.src           ip.dst  tcp.srcport  \
0      1.741284e+09   204.79.197.239      192.168.1.4        443.0   
1      1.741284e+09      192.168.1.4   204.79.197.239      63504.0   
2      1.741284e+09  142.250.193.162      192.168.1.4        443.0   
3      1.741284e+09      192.168.1.4  142.250.193.162      63503.0   
4      1.741284e+09  142.250.195.206      192.168.1.4          NaN   

   tcp.dstport  http.file_data  \
0      63504.0             NaN   
1        443.0             NaN   
2      63503.0             NaN   
3        443.0             NaN   
4          NaN             NaN   

                                                info  
0  [TCP Keep-Alive ACK] 443 → 63504 [ACK] Seq=695...  
1  [TCP Keep-Alive] 63504 → 443 [ACK] Seq=2848 Ac...  
2  [TCP Keep-Alive ACK] 443 → 63503 [ACK] Seq=529...  
3  [TCP Keep-Alive] 63503 → 443 [ACK] Seq=1811 Ac...  
4                            Protected Payload (KP0)  

Columns in the dataset:
In

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
for col in ['protocol_type', 'service', 'flag']:
    df[col] = encoder.fit_transform(df[col])

# Encode labels
df['label'] = encoder.fit_transform(df['label'])

print("\nAfter Encoding:")
print(df.head())


After Encoding:
   protocol_type  service  flag  label
0             41       28  1422      0
1             28       43  1427      0
2              6       28  1421      0
3             28        6  1426      0
4              9       28  1041      0


In [None]:
X = df.drop(columns=['label'])
y = df['label']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining samples: {len(X_train)}, Testing samples: {len(X_test)}")


Training samples: 13587, Testing samples: 3397


In [None]:
# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

joblib.dump(model, "intrusion_detection_model.pkl")
print("\nModel trained and saved successfully!")


Model trained and saved successfully!


In [None]:
# Load trained model
model = joblib.load("intrusion_detection_model.pkl")

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")


print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Model Accuracy: 100.00%

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3397

    accuracy                           1.00      3397
   macro avg       1.00      1.00      1.00      3397
weighted avg       1.00      1.00      1.00      3397



In [None]:
import joblib

encoder_dict = {}
for col in ['protocol_type', 'service', 'flag']:
    encoder_dict[col] = LabelEncoder()
    df[col] = encoder_dict[col].fit_transform(df[col])

# Save the encoders for later use
joblib.dump(encoder_dict, "label_encoders.pkl")

print("\nEncoders saved successfully!")


Encoders saved successfully!


In [None]:
# Load the trained model and encoders
model = joblib.load("intrusion_detection_model.pkl")
encoder_dict = joblib.load("label_encoders.pkl")

def safe_encode(value, column_name):
    """Encodes a value safely, assigning 'unknown' if unseen."""
    encoder = encoder_dict[column_name]

    if value in encoder.classes_:
        return encoder.transform([value])[0]  # Return encoded value
    else:
        return len(encoder.classes_)  # Assign new category for unseen values

def detect_intrusion(new_data):
    """Predict intrusion from new network traffic data."""
    new_data_df = pd.DataFrame([new_data])

    for col in ['protocol_type', 'service', 'flag']:
        new_data_df[col] = safe_encode(new_data_df[col][0], col)

    # Make prediction
    prediction = model.predict(new_data_df)
    return "🚨 Intrusion Detected!" if prediction[0] == 1 else "✅ Normal Traffic"

sample_input = {'protocol_type': 'TCP', 'service': 'http', 'flag': 'SF'}
print("\nTest Prediction:", detect_intrusion(sample_input))


Test Prediction: ✅ Normal Traffic
