In [1]:
import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from PIL import Image, ImageTk
import joblib

In [2]:
pmap = {'icmp': 0, 'tcp': 1, 'udp': 2}
fmap = {'SF': 0, 'S0': 1, 'REJ': 2, 'RSTR': 3, 'RSTO': 4, 'SH': 5, 'S1': 6, 'S2': 7, 'RSTOS0': 8, 'S3': 9, 'OTH': 10}
attacks_types = {
    'normal': 'normal', 'back': 'dos', 'buffer_overflow': 'u2r', 'ftp_write': 'r2l',
    'guess_passwd': 'r2l', 'imap': 'r2l', 'ipsweep': 'probe', 'land': 'dos',
    'loadmodule': 'u2r', 'multihop': 'r2l', 'neptune': 'dos', 'nmap': 'probe',
    'perl': 'u2r', 'phf': 'r2l', 'pod': 'dos', 'portsweep': 'probe',
    'rootkit': 'u2r', 'satan': 'probe', 'smurf': 'dos', 'spy': 'r2l',
    'teardrop': 'dos', 'warezclient': 'r2l', 'warezmaster': 'r2l'
}

In [3]:
cols = """duration,
protocol_type,
service,
flag,
src_bytes,
dst_bytes,
land,
wrong_fragment,
urgent,
hot,
num_failed_logins,
logged_in,
num_compromised,
root_shell,
su_attempted,
num_root,
num_file_creations,
num_shells,
num_access_files,
num_outbound_cmds,
is_host_login,
is_guest_login,
count,
srv_count,
serror_rate,
srv_serror_rate,
rerror_rate,
srv_rerror_rate,
same_srv_rate,
diff_srv_rate,
srv_diff_host_rate,
dst_host_count,
dst_host_srv_count,
dst_host_same_srv_rate,
dst_host_diff_srv_rate,
dst_host_same_src_port_rate,
dst_host_srv_diff_host_rate,
dst_host_serror_rate,
dst_host_srv_serror_rate,
dst_host_rerror_rate,
dst_host_srv_rerror_rate"""

# Parse the columns string to extract valid column names
columns = []
for c in cols.split(','):
    if c.strip():
        columns.append(c.strip())

# Append the target column name to the list of columns
columns.append('target')

# Print the list of columns and its length
print(columns)
print(len(columns))


['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'target']
42


In [4]:
# Read dataset and perform preprocessing
def preprocess_dataset(dataset_path):
    dataset_path = "C:\\Users\\Harmehar\\Desktop\\Intrusion-Detection-System\\dataset\\kddcup.data_10_percent.gz"
    df = pd.read_csv(dataset_path, names=columns)
    df['Attack Type'] = df.target.apply(lambda r: attacks_types[r[:-1]])
    
    # Adding Attack Type column based on the target column
    df['Attack Type'] = df.target.apply(lambda r: attacks_types[r[:-1]])

    # Drop columns with NaN values and keep columns with more than 1 unique value
    df = df.dropna(axis='columns')
    df = df[[col for col in df if df[col].nunique() > 1]]

    # Remove highly correlated features
    df.drop(['num_root', 'srv_serror_rate', 'srv_rerror_rate', 'dst_host_srv_serror_rate',
             'dst_host_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
             'dst_host_same_srv_rate'], axis=1, inplace=True)

    # Feature mapping for categorical variables
    df['protocol_type'] = df['protocol_type'].map(pmap)
    df['flag'] = df['flag'].map(fmap)

    # Drop 'service' column
    df.drop('service', axis=1, inplace=True)

    # Split data into features (X) and target (Y)
    Y = df[['Attack Type']]
    X = df.drop(['Attack Type', 'target'], axis=1)

    # Scale the features using Min-Max Scaling
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    # Split data into training and testing sets
    X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.33, random_state=42)

    # Return preprocessed data
    X_train, X_test, Y_train, Y_test
    
    return df

In [5]:
# Train the selected model on the dataset
def train_model(model_name, X_train, Y_train):
    if model_name == 'Naive Bayes':
        model = GaussianNB()
    elif model_name == 'Decision Tree':
        model = DecisionTreeClassifier(criterion="entropy", max_depth=4)
    elif model_name == 'Random Forest':
        model = RandomForestClassifier(n_estimators=30)
    elif model_name == 'SVM':
        model = SVC(gamma='scale')
    elif model_name == 'Logistic Regression':
        model = LogisticRegression(max_iter=1200000)
    elif model_name == 'Gradient Boosting':
        model = GradientBoostingClassifier(random_state=0)
    else:
        return None
    
    model.fit(X_train, Y_train)
    return model

In [6]:
# Predict output using the trained model
def predict_output(model, input_data):
    try:
        X = np.array(input_data).reshape(1, -1)
        Y_output = model.predict(X)
        return Y_output[0]
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {str(e)}")

In [7]:
# Display selected plot based on analysis feature
def display_selected_plot(plot_type):
    plot_mapping = {...}
    if plot_type in plot_mapping:
        title, names, values = plot_mapping[plot_type]
        fig = plt.figure(figsize=(6, 4))
        plt.bar(names, values)
        plt.title(title)
        show_plot(fig, title)

In [8]:
def show_plot(fig, title):
    plot_window = tk.Toplevel(root)
    plot_window.title(title)
    canvas = FigureCanvasTkAgg(fig, master=plot_window)
    canvas.draw()
    canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

In [9]:
def collect_analysis_result():
    selected_feature = selected_analysis_feature.get()
    if selected_feature:
        # Map selected feature to corresponding plot file name
        plot_mapping = {
            "Training Accuracy": "training_accuracy_plot.png",
            "Testing Accuracy": "test_accuracy_plot.png",
            "Training Time": "train_time_plot.png",
            "Testing Time": "test_time_plot.png"
        }

        # Get the filename of the selected plot
        plot_filename = plot_mapping.get(selected_feature)
        if plot_filename:
            # Check if the plot file exists
            if os.path.exists(plot_filename):
                # Open and display the plot image
                image = Image.open(plot_filename)
                image = image.resize((400, 300), Image.LANCZOS)  # Resize image for display
                photo = ImageTk.PhotoImage(image)

                # Update the image in the analysis result label
                analysis_result_label.configure(image=photo)
                analysis_result_label.image = photo  # Keep reference to avoid garbage collection
            else:
                print(f"Plot file '{plot_filename}' not found.")
        else:
            print(f"No plot available for the selected feature: {selected_feature}")
    else:
        print("Please select an analysis feature.")

In [10]:
import os
import joblib

def submit_model_training():
    selected_model = selected_training_model.get()
    selected_dataset = selected_training_dataset.get()
    
    if selected_model and selected_dataset:
        dataset_path = "C:\\Users\\Harmehar\\Desktop\\Intrusion-Detection-System\\dataset\\kddcup.data_10_percent.gz" 
        df = preprocess_dataset(dataset_path)
        
        Y = df[['Attack Type']]
        X = df.drop(['Attack Type', 'target'], axis=1)
        
        sc = MinMaxScaler()
        X = sc.fit_transform(X)
        
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
        
        trained_model = train_model(selected_model, X_train, Y_train.values.ravel())
        if trained_model:
            # Specify the directory and filename for saving the trained model
            trained_model_dir = "C:\\Users\\Harmehar\\Desktop\\Intrusion-Detection-System\\trained-model"
            trained_model_path = os.path.join(trained_model_dir, "trained_model.pkl")
            
            # Create the directory if it doesn't exist
            os.makedirs(trained_model_dir, exist_ok=True)
            
            # Save the trained model to the specified path
            joblib.dump(trained_model, trained_model_path)
            
            messagebox.showinfo("Model Training", f"Training completed using {selected_model} on {selected_dataset}.")
            
            # Enable test button and change input fields to dataset dropdown
            button_test_model.config(state=tk.NORMAL)
            dataset_dropdown.config(state="readonly")  # Change to readonly state for dataset selection
    else:
        messagebox.showwarning("Warning", "Please select dataset and model for training.")

In [11]:
def perform_model_testing(model, input_data):
    try:
        X = input_data.reshape(1, -1)
        predicted_output = model.predict(X)
        return predicted_output[0]
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {str(e)}")

In [12]:
def test_trained_model():
    selected_dataset = selected_training_dataset.get()
    
    if selected_dataset:
        # Load the trained model
        trained_model_path = "C:\\Users\\Harmehar\\Desktop\\Intrusion-Detection-System\\trained-model\\trained_model.pkl"
        
        try:
            trained_model = joblib.load(trained_model_path)
            
            dataset_path = "C:\\Users\\Harmehar\\Desktop\\Intrusion-Detection-System\\dataset\\kddcup.data_10_percent.gz"
            df = preprocess_dataset(dataset_path)
            
            # Use the trained model to make predictions on the test dataset
            predictions = perform_model_testing(trained_model, df)  # Use perform_model_testing for prediction
            
            # Show the results or use the predictions as needed
            messagebox.showinfo("Test Model", "Testing the trained model.")
        except FileNotFoundError:
            messagebox.showerror("Error", "Trained model file not found.")
        except Exception as e:
            messagebox.showerror("Error", f"An error occurred during testing: {str(e)}")
    else:
        messagebox.showwarning("Warning", "Please select a dataset for testing.")

In [13]:
def perform_prediction_test(X_train, Y_train):
    try:
        input_data = []
        all_entries = basic_features_frame.winfo_children() + network_features_frame.winfo_children()
        for widget in all_entries:
            if isinstance(widget, ttk.Entry):
                value = float(widget.get()) if widget.get().strip().replace('.', '', 1).isdigit() else 0.0
                input_data.append(value)

        selected_model = selected_model_test.get()

        model = train_model(selected_model, X_train, Y_train.ravel())
        if model:
            predicted_output = predict_output(model, input_data)
            messagebox.showinfo("Prediction Result", f"Predicted Output (Y): {predicted_output}")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {str(e)}")

In [14]:
root = tk.Tk()
root.title("Intrusion Detection System Analysis")

# Create a notebook (tabbed interface)
notebook = ttk.Notebook(root)
notebook.pack(pady=10, fill='both', expand=True)

In [15]:
# Third tab: "Model Training"
model_training_frame = ttk.Frame(notebook)
notebook.add(model_training_frame, text='Model Training')

# Create frames for training and testing
training_frame = ttk.LabelFrame(model_training_frame, text='Train Model')
training_frame.grid(row=0, column=0, padx=10, pady=10, sticky='nsew')

testing_frame = ttk.LabelFrame(model_training_frame, text='Test Model')
testing_frame.grid(row=0, column=1, padx=10, pady=10, sticky='nsew')

# Dropdown for selecting model (under training frame)
selected_training_model = tk.StringVar()
model_training_options = ["Naive Bayes", "Decision Tree", "Random Forest", "SVM", "Logistic Regression", "Gradient Boosting"]
model_training_dropdown = ttk.OptionMenu(training_frame, selected_training_model, *model_training_options)
model_training_dropdown.grid(row=0, column=0, padx=10, pady=10)

# Dropdown for selecting dataset (under training frame)
selected_training_dataset = tk.StringVar(value="kdd")  # Default dataset selection
dataset_options = ["kdd", "other_dataset_1", "other_dataset_2"]  # Add more dataset options
dataset_dropdown = ttk.Combobox(training_frame, textvariable=selected_training_dataset, values=dataset_options, state="disabled")
dataset_dropdown.grid(row=0, column=1, padx=10, pady=10)

# Button to submit model training (under training frame)
train_button = ttk.Button(training_frame, text="Train Model", command=submit_model_training)
train_button.grid(row=1, column=0, columnspan=2, padx=10, pady=10)

# Entries for testing input (under testing frame)
# Replace testing entries with dataset dropdown after training
label = ttk.Label(testing_frame, text="Select Dataset:")
label.grid(row=0, column=0, padx=10, pady=5, sticky='w')
dataset_dropdown_test = ttk.Combobox(testing_frame, textvariable=selected_training_dataset, values=dataset_options, state="readonly")
dataset_dropdown_test.grid(row=0, column=1, padx=10, pady=5)

# Button to test the trained model (under testing frame)
button_test_model = ttk.Button(testing_frame, text="Test Model", command=test_trained_model, state=tk.DISABLED)
button_test_model.grid(row=1, column=0, columnspan=2, pady=10)

In [16]:

# First tab: "Test Yourself"
test_yourself_frame = ttk.Frame(notebook)
notebook.add(test_yourself_frame, text='Test Yourself')

# Create frames for basic features, network features, and model selection
basic_features_frame = ttk.LabelFrame(test_yourself_frame, text='Basic Features')
basic_features_frame.grid(row=0, column=0, padx=10, pady=10, sticky='nsew')

network_features_frame = ttk.LabelFrame(test_yourself_frame, text='Network Features')
network_features_frame.grid(row=0, column=1, padx=10, pady=10, sticky='nsew')

model_selection_frame = ttk.LabelFrame(test_yourself_frame, text='Model Selection')
model_selection_frame.grid(row=0, column=2, padx=10, pady=10, sticky='nsew')

# Populate basic features frame (example)
basic_features = ["duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land", "wrong_fragment",
                   "urgent", "hot", "num_failed_logins", "logged_in", "num_compromised", "root_shell", "su_attempted"]

for idx, feature in enumerate(basic_features):
    label = ttk.Label(basic_features_frame, text=feature)
    label.grid(row=idx, column=0, padx=5, pady=2, sticky='w')
    entry = ttk.Entry(basic_features_frame, width=10)
    entry.grid(row=idx, column=1, padx=5, pady=2)

# Populate network features frame (example)
network_features = ["num_root", "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds",
                    "is_host_login", "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate",
                    "rerror_rate", "srv_rerror_rate", "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate",
                    "dst_host_count", "dst_host_srv_count", "dst_host_same_srv_rate", "dst_host_diff_srv_rate",
                    "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate", "dst_host_serror_rate",
                    "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate"]

# Calculate number of rows and columns for network features layout
num_rows = (len(network_features) + 1) // 2
for idx, feature in enumerate(network_features):
    label = ttk.Label(network_features_frame, text=feature)
    col = idx % 2
    row = idx // 2
    label.grid(row=row, column=col * 2, padx=5, pady=2, sticky='w')
    entry = ttk.Entry(network_features_frame, width=10)
    entry.grid(row=row, column=col * 2 + 1, padx=5, pady=2)
    
# Model selection dropdown and Predict button within model selection frame
model_options = ["Naive Bayes", "Decision Tree", "Random Forest", "SVM", "Logistic Regression", "Gradient Boosting"]
selected_model_test = tk.StringVar(model_selection_frame)
selected_model_test.set("Select Model")

model_menu = ttk.OptionMenu(model_selection_frame, selected_model_test, *model_options)
model_menu.grid(row=0, column=0, padx=5, pady=5)

button_predict_test = ttk.Button(model_selection_frame, text="Predict Output (Y)", command=perform_prediction_test)
button_predict_test.grid(row=1, column=0, padx=5, pady=5)

# Configure weights for row and column resizing within the test_yourself_frame
test_yourself_frame.columnconfigure((0, 1, 2), weight=1)
test_yourself_frame.rowconfigure(0, weight=1)

In [17]:
# Second tab: "Analysis Results"
analysis_results_frame = ttk.Frame(notebook)
notebook.add(analysis_results_frame, text='Analysis Results')

# Dropdown for selecting analysis feature
selected_analysis_feature = tk.StringVar()
analysis_feature_options = ["Training Accuracy", "Testing Accuracy", "Training Time", "Testing Time"]
analysis_dropdown = ttk.OptionMenu(analysis_results_frame, selected_analysis_feature, *analysis_feature_options)
analysis_dropdown.pack(padx=10, pady=10)

# Button to display selected analysis feature
analysis_button = ttk.Button(analysis_results_frame, text="Show Analysis", command=collect_analysis_result)
analysis_button.pack(padx=10, pady=10)

# Label to display analysis result (image)
analysis_result_label = ttk.Label(analysis_results_frame)
analysis_result_label.pack(padx=10, pady=10)

In [18]:
# Run the Tkinter main loop
root.mainloop()