In [2]:
# Latest Working Model

import lime.lime_tabular
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve

# ---- Data Loading and Preprocessing ---- #

# Load datasets 
X_train_unscaled = pd.read_csv('C:\\UNSW_NB15_training-set.csv')
X_test_unscaled = pd.read_csv('C:\\UNSW_NB15_testing-set.csv')

# Extracting target labels 
y_train = X_train_unscaled['attack_cat']  # Useing attack categories instead of binary labels
y_test = X_test_unscaled['attack_cat']

# Dropping 'attack_cat' and 'label' columns to get features (X_train and X_test)
X_train_unscaled = X_train_unscaled.drop(['attack_cat', 'label'], axis=1)
X_test_unscaled = X_test_unscaled.drop(['attack_cat', 'label'], axis=1)

# One-hot encode categorical variables ('proto', 'state', 'service')
categorical_columns = ['proto', 'state', 'service']
X_train_unscaled = pd.get_dummies(X_train_unscaled, columns=categorical_columns)
X_test_unscaled = pd.get_dummies(X_test_unscaled, columns=categorical_columns)

# Aligning columns between training and testing sets
X_train_unscaled, X_test_unscaled = X_train_unscaled.align(X_test_unscaled, join='inner', axis=1)

# Apply SMOTE to balance the classes using multi-class approach
sm = SMOTE(random_state=42)
X_train_smote, y_train_smote = sm.fit_resample(X_train_unscaled, y_train)

In [None]:
# ---- Model Training and Hyperparameter Tuning ---- #

# Setting up the hyperparameter grid for RandomForest tuning
param_grid = {
    'n_estimators': [200],             # Number of trees
    'max_depth': [10],                 # Maximum tree depth
    'min_samples_split': [20],         # Minimum samples required to split
    'class_weight': ['balanced']       # Apply class balancing for multi-class
}

# Setting up GridSearchCV to find the best combination of hyperparameters
rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)

# Training the model using GridSearchCV with SMOTE-applied data
grid_search.fit(X_train_smote, y_train_smote)

# Predicting probabilities for multi-class labels
y_pred_prob = grid_search.best_estimator_.predict_proba(X_test_unscaled)


In [None]:
# ---- Model Evaluation ---- #

# Predicting the actual labels using the best estimator
y_pred_tuned = grid_search.best_estimator_.predict(X_test_unscaled)

# Evaluating the multi-class model
print(f"\nConfusion Matrix (Multi-Class Classification):")
print(confusion_matrix(y_test, y_pred_tuned))

print(f"\nClassification Report (Multi-Class Classification):")
print(classification_report(y_test, y_pred_tuned))

In [None]:
# ---- Predefined Regime for Handling Attacks ---- #

# Predefined actions based on attack categories
def handle_attack(attack_type):
    response_dict = {
        'DoS': "Alert the network administrator to mitigate bandwidth.",
        'Backdoor': "Quarantine affected systems and conduct a security audit.",
        'Exploits': "Patch the system and review user access privileges.",
        'Fuzzers': "Monitor network traffic and implement protocol hardening.",
        'Generic': "Review system configurations and apply security patches.",
        'Reconnaissance': "Monitor suspicious activities and restrict external access.",
        'Shellcode': "Run malware detection tools and isolate the affected systems.",
        'Worms': "Perform a full network scan and isolate infected devices.",
        'Analysis': "Conduct detailed forensic analysis on the traffic data.",
        'Normal': "No action required, system functioning normally."
    }
    return response_dict.get(attack_type, "Unknown attack type. Further investigation needed.")

# Handling attack after prediction
def handle_predicted_attack(y_pred_tuned, y_true):
    for idx, pred in enumerate(y_pred_tuned):
        attack_type = y_true.iloc[idx]  # Actual attack type from the test set
        response = handle_attack(attack_type)
        print(f"Instance {idx+1}: Predicted Attack Type: {attack_type} | Suggested Action: {response}")

# Executing predefined regime on predicted attacks
handle_predicted_attack(y_pred_tuned, y_test)

In [None]:
# ---- LIME Implementation for Multi-Class ---- #

# Using the column names from X_train_unscaled to ensure feature names match
feature_names = X_train_unscaled.columns

# Initializing LIME explainer for multi-class classification
explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=X_train_unscaled.values,   # Training data
    mode='classification',
    feature_names=feature_names,  # Feature names from the dataset
    class_names=grid_search.best_estimator_.classes_,  # Class names from the model (attack categories)
    discretize_continuous=False  # Set to False to avoid discretizing continuous features
)

# Explain multiple instances using LIME
for idx in range(5):  # Iterate over multiple instances
    instance = X_test_unscaled.iloc[idx]
    exp = explainer.explain_instance(
        data_row=instance.values,
        predict_fn=grid_search.best_estimator_.predict_proba,
        num_features=10,  # Show top 10 features
        top_labels=5  # Show explanations for top 3 predicted classes
    )
    exp.show_in_notebook()

In [None]:
# ---- Global Feature Importance Plot ---- #

importances = grid_search.best_estimator_.feature_importances_
indices = importances.argsort()[::-1]

# Plot top 20 features
plt.figure(figsize=(12, 8))
plt.title("Global Feature Importances")
plt.bar(range(20), importances[indices][:20], align="center")
plt.xticks(range(20), X_train_unscaled.columns[indices][:20], rotation=90)
plt.xlabel("Features")  # X-Axis Label
plt.ylabel("Importance Score")
plt.tight_layout()
plt.show()

In [None]:
# ---- Surrogate Model to Simplify ---- #

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Train a surrogate decision tree for interpretability
surrogate = DecisionTreeClassifier(max_depth=5)
surrogate.fit(X_train_unscaled, grid_search.best_estimator_.predict(X_train_unscaled))

# Predict using the surrogate model
y_surrogate_pred = surrogate.predict(X_test_unscaled)
print(f"Surrogate Model Accuracy: {accuracy_score(y_test, y_surrogate_pred)}")




In [9]:
# import smtplib
# from email.mime.text import MIMEText
# import subprocess
# import logging
# import os
# import pandas as pd
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.model_selection import GridSearchCV
# from imblearn.over_sampling import SMOTE
# from sklearn.metrics import classification_report, confusion_matrix

# # ---- Predefined Regime for Handling Attacks with Automated Actions ---- #

# # Setting up logging for recording actions
# logging.basicConfig(filename='intrusion_detection.log', level=logging.INFO, 
#                     format='%(asctime)s - %(message)s')

# # Send an email alert
# def send_email_alert(attack_type, instance_idx):
#     sender = 'your_email@example.com'
#     recipients = ['admin@example.com']  # List of admin emails
#     subject = f"Intrusion Detected: {attack_type} at instance {instance_idx+1}"
#     body = f"Intrusion Detected: {attack_type}. Immediate action required."

#     msg = MIMEText(body)
#     msg['Subject'] = subject
#     msg['From'] = sender
#     msg['To'] = ', '.join(recipients)

#     try:
#         server = smtplib.SMTP('smtp.example.com', 587)
#         server.starttls()
#         server.login(sender, 'your_email_password')
#         server.sendmail(sender, recipients, msg.as_string())
#         server.quit()
#         print(f"Email alert sent for {attack_type}")
#     except Exception as e:
#         print(f"Failed to send email: {str(e)}")

# # Update firewall rules (Linux iptables example)
# def block_ip(ip_address):
#     try:
#         subprocess.run(['sudo', 'iptables', '-A', 'INPUT', '-s', ip_address, '-j', 'DROP'])
#         logging.info(f"Blocked IP {ip_address} via iptables")
#     except Exception as e:
#         logging.error(f"Failed to block IP {ip_address}: {str(e)}")

# # Quarantine device (example: shutdown device)
# def quarantine_device(device_name):
#     try:
#         subprocess.run(['sudo', 'ifconfig', device_name, 'down'])
#         logging.info(f"Quarantined device {device_name}")
#     except Exception as e:
#         logging.error(f"Failed to quarantine device {device_name}: {str(e)}")

# # Log the attack in a security log
# def log_attack(attack_type, instance_idx):
#     logging.info(f"Attack detected: {attack_type} at instance {instance_idx+1}")

# # Predefined actions based on attack categories
# def handle_attack(attack_type, instance_idx):
#     response_dict = {
#         'DoS': "Alert the network administrator to mitigate bandwidth.",
#         'Backdoor': "Quarantine affected systems and conduct a security audit.",
#         'Exploits': "Patch the system and review user access privileges.",
#         'Fuzzers': "Monitor network traffic and implement protocol hardening.",
#         'Generic': "Review system configurations and apply security patches.",
#         'Reconnaissance': "Monitor suspicious activities and restrict external access.",
#         'Shellcode': "Run malware detection tools and isolate the affected systems.",
#         'Worms': "Perform a full network scan and isolate infected devices.",
#         'Analysis': "Conduct detailed forensic analysis on the traffic data.",
#         'Normal': "No action required, system functioning normally."
#     }
#     action = response_dict.get(attack_type, "Unknown attack type. Further investigation needed.")
    
#     # Take automated action based on attack type
#     if attack_type == 'DoS':
#         send_email_alert(attack_type, instance_idx)
#     elif attack_type == 'Backdoor':
#         quarantine_device('eth0')  # Example: Quarantine network interface eth0
#     elif attack_type == 'Exploits':
#         block_ip('192.168.1.100')  # Example: Block suspicious IP (replace with actual)
    
#     # Log the action
#     log_attack(attack_type, instance_idx)
    
#     return action

# # Example: Handling attack after prediction
# def handle_predicted_attack(y_pred, y_true):
#     for idx, pred in enumerate(y_pred):
#         attack_type = y_true.iloc[idx]  # Actual attack type from the test set
#         response = handle_attack(attack_type, idx)
#         print(f"Instance {idx+1}: Predicted Attack Type: {attack_type} | Suggested Action: {response}")

# # ---- Example: Run the Model and Trigger Actions ---- #
# # Train and run the model (this part assumes the code above already trains the model)
# # y_pred and y_test should come from the model's prediction and actual data

# handle_predicted_attack(y_pred, y_test)


In [10]:
# # Import necessary libraries
# import h2o
# from h2o.estimators import H2ORandomForestEstimator
# import lime.lime_tabular
# import matplotlib.pyplot as plt

# # Initialize H2O cluster
# h2o.init()

# # Load your dataset into H2O frames
# h2o_train = h2o.import_file('C:\\Users\\owner\\Desktop\\CC\\Data_unscaled_raw\\UNSW_NB15_training-set.csv')  # Replace with actual path
# h2o_test = h2o.import_file('C:\\Users\\owner\\Desktop\\CC\\Data_unscaled_raw\\UNSW_NB15_testing-set.csv')  # Replace with actual path

# # Define predictors and response variable
# predictors = h2o_train.columns[:-1]  # All columns except the last one
# response = h2o_train.columns[-1]     # The last column is the target

# # Train a single DRF model with optimized parameters
# drf_model = H2ORandomForestEstimator(
#     ntrees=50,         # Reduce number of trees
#     max_depth=5,       # Shallow trees
#     sample_rate=0.8,   # Use 80% of rows
#     seed=42            # For reproducibility
# )

# # Train the DRF model
# drf_model.train(x=predictors, y=response, training_frame=h2o_train)

# # Evaluate DRF on the test set
# perf = drf_model.model_performance(h2o_test)

# # Print metrics
# print(perf)

# # Print confusion matrix
# print("Confusion Matrix:")
# conf_matrix = perf.confusion_matrix()
# print(conf_matrix)

# # If required, save as Pandas DataFrame
# conf_matrix_df = h2o.as_list(conf_matrix.table)
# print("Confusion Matrix as DataFrame:")
# print(conf_matrix_df)

# # Plot ROC curve for binary classification
# if perf._metric_json.get('thresholds_and_metric_scores', None):
#     print("Plotting ROC curve...")
#     perf.plot(metric="roc")
# else:
#     print("ROC curve plotting not supported for multiclass classification.")



# # Convert test data to Pandas DataFrame for LIME
# X_test = h2o_test[predictors].as_data_frame()
# y_test = h2o_test[response].as_data_frame()

# # Initialize LIME explainer
# explainer = lime.lime_tabular.LimeTabularExplainer(
#     training_data=h2o_train[predictors].as_data_frame().values,
#     mode="classification",
#     feature_names=predictors,
#     class_names=drf_model.levels(response),
#     discretize_continuous=True
# )

# # Function to adapt DRF predictions for LIME
# def drf_predict_fn(input_data):
#     h2o_frame = h2o.H2OFrame(input_data)
#     predictions = drf_model.predict(h2o_frame)
#     return predictions.as_data_frame().iloc[:, 1:].values  # Return probabilities

# # Explain one instance from the test set
# instance_idx = 0  # Change index to explain different instances
# test_instance = X_test.iloc[instance_idx].values
# exp = explainer.explain_instance(
#     data_row=test_instance,
#     predict_fn=drf_predict_fn,
#     num_features=10  # Show top 10 features
# )

# # Display explanation
# exp.show_in_notebook()

# # Save explanation as HTML for further review
# exp.save_to_file('lime_drf_explanation.html')

# # Shutdown H2O cluster when done
# h2o.shutdown(prompt=False)


In [None]:
# ---- Shut Down H2O ---- #
h2o.shutdown(prompt=False)
