In [1]:
import pandas as pd

# Load the dataset (assuming you have the CSV file downloaded)
train_data = pd.read_csv('KDDTrain+.txt', header=None)
test_data = pd.read_csv('KDDTest+.txt', header=None)

# Column names (as per NSL-KDD documentation)
columns = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 
           'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 
           'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 
           'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 
           'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 
           'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 
           'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 
           'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 
           'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'label', 'difficulty']

train_data.columns = columns
test_data.columns = columns

# Check the data
train_data.head()


Unnamed: 0,duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,...,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,label,difficulty
0,0,tcp,ftp_data,SF,491,0,0,0,0,0,...,0.17,0.03,0.17,0.0,0.0,0.0,0.05,0.0,normal,20
1,0,udp,other,SF,146,0,0,0,0,0,...,0.0,0.6,0.88,0.0,0.0,0.0,0.0,0.0,normal,15
2,0,tcp,private,S0,0,0,0,0,0,0,...,0.1,0.05,0.0,0.0,1.0,1.0,0.0,0.0,neptune,19
3,0,tcp,http,SF,232,8153,0,0,0,0,...,1.0,0.0,0.03,0.04,0.03,0.01,0.0,0.01,normal,21
4,0,tcp,http,SF,199,420,0,0,0,0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,normal,21


In [2]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Separate features and labels
X_train = train_data.drop(['label', 'difficulty'], axis=1)
y_train = train_data['label']

X_test = test_data.drop(['label', 'difficulty'], axis=1)
y_test = test_data['label']

# Encode categorical features (e.g., protocol_type, service, flag)
categorical_columns = ['protocol_type', 'service', 'flag']

# Label encoding for categorical columns
for col in categorical_columns:
    encoder = LabelEncoder()
    X_train[col] = encoder.fit_transform(X_train[col])
    X_test[col] = encoder.transform(X_test[col])

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Train a Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = rf_model.predict(X_test_scaled)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.7221433640880057

Classification Report:
                  precision    recall  f1-score   support

        apache2       0.00      0.00      0.00       737
           back       0.93      0.96      0.94       359
buffer_overflow       0.00      0.00      0.00        20
      ftp_write       0.00      0.00      0.00         3
   guess_passwd       0.00      0.00      0.00      1231
     httptunnel       0.00      0.00      0.00       133
           imap       0.00      0.00      0.00         1
        ipsweep       0.58      0.98      0.73       141
           land       1.00      0.14      0.25         7
     loadmodule       0.00      0.00      0.00         2
       mailbomb       0.00      0.00      0.00       293
          mscan       0.00      0.00      0.00       996
       multihop       0.00      0.00      0.00        18
          named       0.00      0.00      0.00        17
        neptune       0.96      1.00      0.98      4657
           nmap       0.99      1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [4]:
# Define a simple automated response system based on detected attacks
def automated_response(prediction):
    response_actions = {
        'normal': "No action needed, traffic is safe.",
        'neptune': "DDoS attack detected! Initiating IP block...",
        'smurf': "DDoS attack detected! Rate-limiting network traffic...",
        'back': "Unauthorized access attempt! Alerting security team...",
        'satan': "Port scanning detected! Locking down firewall rules...",
        'warezclient': "Suspicious file transfer! Isolating affected machine...",
        # Add more responses for different attack types
    }
    return response_actions.get(prediction, "Unknown threat, further investigation required.")

# Example simulation
for i in range(5):  # Check first 5 test samples
    attack_type = y_pred[i]
    response = automated_response(attack_type)
    print(f"Detected: {attack_type} --> Response: {response}")


Detected: neptune --> Response: DDoS attack detected! Initiating IP block...
Detected: neptune --> Response: DDoS attack detected! Initiating IP block...
Detected: normal --> Response: No action needed, traffic is safe.
Detected: ipsweep --> Response: Unknown threat, further investigation required.
Detected: normal --> Response: No action needed, traffic is safe.


In [5]:
def automated_response(prediction):
    response_actions = {
        'normal': "No action needed, traffic is safe.",
        'neptune': "DDoS attack detected! Initiating IP block...",
        'smurf': "DDoS attack detected! Rate-limiting network traffic...",
        'back': "Unauthorized access attempt! Alerting security team...",
        'satan': "Port scanning detected! Locking down firewall rules...",
        'warezclient': "Suspicious file transfer! Isolating affected machine...",
        # Add more responses for different attack types
    }
    return response_actions.get(prediction, "Unknown threat, further investigation required.")

# Testing with the first 5 predictions
for i in range(5):  # Check first 5 test samples
    attack_type = y_pred[i]
    response = automated_response(attack_type)
    print(f"Detected: {attack_type} --> Response: {response}")


Detected: neptune --> Response: DDoS attack detected! Initiating IP block...
Detected: neptune --> Response: DDoS attack detected! Initiating IP block...
Detected: normal --> Response: No action needed, traffic is safe.
Detected: ipsweep --> Response: Unknown threat, further investigation required.
Detected: normal --> Response: No action needed, traffic is safe.
