<a href="https://colab.research.google.com/github/Latakumari-17/Agent_AI_cybersecurity-threat_detection/blob/main/Agent_for_cybersecurity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()


In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!kaggle datasets download -d dnkumars/cybersecurity-intrusion-detection-dataset
!unzip cybersecurity-intrusion-detection-dataset.zip -d cybersecurity-intrusion-detection-dataset


In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('cybersecurity-intrusion-detection-dataset/cybersecurity_intrusion_data.csv')
print(df.shape)
df.head()


In [None]:

print(df.info())
print(df.describe())
print(df.columns)
print("Missing values per column:\n", df.isnull().sum())
print("Duplicate rows:", df.duplicated().sum())


In [None]:
import matplotlib.pyplot as plt

df['attack_detected'].value_counts().plot(kind='bar', color=['blue', 'black'])
plt.title("Class Distribution")
plt.xlabel("Class (0 = Normal, 1 = Attack)")
plt.ylabel("Count")
plt.show()


In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = label_encoder.fit_transform(df[col])


In [None]:
X = df.drop('attack_detected', axis=1)
y = df['attack_detected']


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

print("Before SMOTE:\n", y_train.value_counts())
print("After SMOTE:\n", y_train_resampled.value_counts())


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
model.fit(X_train_resampled, y_train_resampled)

y_pred = model.predict(X_test)

print("\nClassification Report After SMOTE:")
print(classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(cm).plot()


In [None]:
import joblib

joblib.dump(model, 'rf_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(X.columns.to_list(), 'feature_columns.pkl')


In [None]:
y_pred = model.predict(X_test)
decoded_preds = label_encoder.inverse_transform(y_pred.astype(int))  # if target was encoded before
print("Sample predictions:", decoded_preds[:10])


In [None]:
!pip install scapy


In [None]:
from scapy.all import sniff, IP, TCP
import joblib


# Load model and label encoder
model = joblib.load('rf_model.pkl')
feature_columns = joblib.load('feature_columns.pkl')

# Function to extract basic features from a packet
def extract_features(packet):
    if IP in packet:
        ip_layer = packet[IP]
        length = len(packet)
        proto = packet.proto if hasattr(packet, 'proto') else 0
        src_ip = int(ip_layer.src.replace('.', ''))
        dst_ip = int(ip_layer.dst.replace('.', ''))
        return [src_ip, dst_ip, length, proto]
    return None

# Process captured packets
def process_packet(packet):
    features = extract_features(packet)
    if features:
        # Pad or align to expected features
        input_array = np.zeros(len(feature_columns))
        input_array[:len(features)] = features
        prediction = model.predict([input_array])[0]
        if prediction == 1:
            print(f" Alert: Potential attack detected from {packet[IP].src}")
        else:
            print(f"Normal packet from {packet[IP].src}")

# Start sniffing (you can set a count limit for testing)
sniff(prn=process_packet, store=False, count=10)
