In [2]:
pip install catboost



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = '/content/drive/MyDrive/ML/dataset_sdn.csv'  # Adjust the path as needed
dataset = pd.read_csv(file_path)

# Separate numeric and categorical columns
numeric_cols = dataset.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = dataset.select_dtypes(include=['object', 'category']).columns

# Handle missing values for numeric columns by filling with the mean
dataset[numeric_cols] = dataset[numeric_cols].fillna(dataset[numeric_cols].mean())

# Handle missing values for categorical columns by filling with a placeholder (like 'unknown')
dataset[categorical_cols] = dataset[categorical_cols].fillna('unknown')

# Convert categorical features to category dtype for CatBoost
categorical_features = ['src', 'dst', 'Protocol']
for col in categorical_features:
    dataset[col] = dataset[col].astype('category')

# Prepare features and labels
X = dataset.drop(columns=['label'])
y = dataset['label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the CatBoost classifier
model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=6, cat_features=categorical_features, class_weights=[1, 10], verbose=False)
model.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = model.predict(X_test)

# Print the classification report and accuracy
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Classification Report: \n{report}")
print(dataset['label'].value_counts())


Accuracy: 0.9987062149599885
Classification Report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12722
           1       1.00      1.00      1.00      8147

    accuracy                           1.00     20869
   macro avg       1.00      1.00      1.00     20869
weighted avg       1.00      1.00      1.00     20869

label
0    63561
1    40784
Name: count, dtype: int64


In [5]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)  # 5-fold cross-validation
print(f"Cross-Validation Accuracy: {scores.mean()} ± {scores.std()}")

Cross-Validation Accuracy: 0.9279888830322488 ± 0.038335011255384195


In [6]:
model.get_feature_importance(prettified=True)

Unnamed: 0,Feature Id,Importances
0,byteperflow,37.041669
1,bytecount,17.183322
2,packetins,15.859636
3,pktcount,8.580687
4,pktperflow,6.229703
5,pktrate,5.135575
6,Protocol,4.369771
7,dt,3.317095
8,dur,0.996026
9,dst,0.320331


In [10]:
import pandas as pd

# Assuming the model is already trained and available as 'model'

# Categorical features used during training
categorical_features = ['src', 'dst', 'Protocol']

# Helper function to safely cast user input to the correct type
def safe_input(prompt, desired_type, default_value=None):
    while True:
        try:
            return desired_type(input(prompt))
        except ValueError:
            print(f"Invalid input. Expected a {desired_type.__name__}. Using default value: {default_value}")
            return desired_type(default_value)

# Function to predict if the traffic is a DDoS attack or not
def predict_ddos():
    # Ask the user for input with safe type conversion
    print("Please provide the following details:")
    dt = safe_input("dt (integer): ", int, 0)
    switch = safe_input("switch (integer): ", int, 1)
    src = input("Source IP (e.g., 10.0.0.1): ") or "10.0.0.1"
    dst = input("Destination IP (e.g., 10.0.0.8): ") or "10.0.0.8"
    pktcount = safe_input("Packet count (integer): ", int, 0)
    bytecount = safe_input("Byte count (integer): ", int, 0)
    dur = safe_input("Duration (integer): ", int, 0)
    dur_nsec = safe_input("Duration in nanoseconds (integer): ", int, 0)
    tot_dur = safe_input("Total duration (float, e.g., 1.01E+11): ", float, 0.0)
    flows = safe_input("Flows (integer): ", int, 1)
    packetins = safe_input("Packet ins (integer): ", int, 0)
    pktperflow = safe_input("Packets per flow (integer): ", int, 0)
    byteperflow = safe_input("Bytes per flow (integer): ", int, 0)
    pktrate = safe_input("Packet rate (integer): ", int, 0)
    Pairflow = safe_input("Pairflow (integer): ", int, 0)
    Protocol = input("Protocol (e.g., UDP): ") or "UDP"
    port_no = safe_input("Port number (integer): ", int, 0)
    tx_bytes = safe_input("Tx bytes (integer): ", int, 0)
    rx_bytes = safe_input("Rx bytes (integer): ", int, 0)
    tx_kbps = safe_input("Tx Kbps (integer): ", int, 0)
    rx_kbps = safe_input("Rx Kbps (float): ", float, 0.0)
    tot_kbps = safe_input("Total Kbps (float): ", float, 0.0)

    # Create a dictionary with user input
    user_input = pd.DataFrame({
        'dt': [dt],
        'switch': [switch],
        'src': [src],
        'dst': [dst],
        'pktcount': [pktcount],
        'bytecount': [bytecount],
        'dur': [dur],
        'dur_nsec': [dur_nsec],
        'tot_dur': [tot_dur],
        'flows': [flows],
        'packetins': [packetins],
        'pktperflow': [pktperflow],
        'byteperflow': [byteperflow],
        'pktrate': [pktrate],
        'Pairflow': [Pairflow],
        'Protocol': [Protocol],
        'port_no': [port_no],
        'tx_bytes': [tx_bytes],
        'rx_bytes': [rx_bytes],
        'tx_kbps': [tx_kbps],
        'rx_kbps': [rx_kbps],
        'tot_kbps': [tot_kbps]
    })

    # Convert the user input for categorical features
    for col in categorical_features:
        user_input[col] = user_input[col].astype('category')

    # Make a prediction
    prediction = model.predict(user_input)[0]

    # Output the result
    if prediction == 1:
        print("This traffic is predicted to be a DDoS attack (malicious).")
    else:
        print("This traffic is predicted to be unharmful (normal).")

# Call the function
predict_ddos()


Please provide the following details:
dt (integer): 14425
switch (integer): 1
Source IP (e.g., 10.0.0.1): 10.0.0.2
Destination IP (e.g., 10.0.0.8): 10.0.0.8
Packet count (integer): 90333
Byte count (integer): 96294978
Duration (integer): 200
Duration in nanoseconds (integer): 744000000
Total duration (float, e.g., 1.01E+11): 2.01E+11
Flows (integer): 3
Packet ins (integer): 1943
Packets per flow (integer): 13534
Bytes per flow (integer): 14427244
Packet rate (integer): 451
Pairflow (integer): 0
Protocol (e.g., UDP): UDP
Port number (integer): 1
Tx bytes (integer): 3885
Rx bytes (integer): 0
Tx Kbps (integer): 0
Rx Kbps (float): 0
Total Kbps (float): 0
This traffic is predicted to be unharmful (normal).
