In [9]:
import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import numpy as np

def display_selected_plot(plot_type):
    plot_mapping = {
        "Training Accuracy": ("Training Accuracy", ['NB', 'DT', 'RF', 'SVM', 'LR', 'GB'], [87.951, 99.058, 99.997, 99.875, 99.352, 99.793]),
        "Testing Accuracy": ("Testing Accuracy", ['NB', 'DT', 'RF', 'SVM', 'LR', 'GB'], [87.903, 99.052, 99.969, 99.879, 99.352, 99.771]),
        "Training Time": ("Training Time (seconds)", ['NB', 'DT', 'RF', 'SVM', 'LR', 'GB'], [1.04721, 1.50483, 11.45332, 126.96016, 56.67286, 446.69099]),
        "Testing Time": ("Testing Time (seconds)", ['NB', 'DT', 'RF', 'SVM', 'LR', 'GB'], [0.79089, 0.10471, 0.60961, 32.72654, 0.02198, 1.41416])
    }

    if plot_type in plot_mapping:
        title, names, values = plot_mapping[plot_type]
        fig = plt.figure(figsize=(6, 4))
        plt.bar(names, values)
        plt.title(title)
        show_plot(fig, title)

def predict_output(model, input_data):
    try:
        X = np.array(input_data).reshape(1, -1)
        Y_output = model.predict(X)
        return Y_output[0]
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {str(e)}")

def show_plot(fig, title):
    plot_window = tk.Toplevel(root)
    plot_window.title(title)
    canvas = FigureCanvasTkAgg(fig, master=plot_window)
    canvas.draw()
    canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

def collect_analysis_result():
    selected_feature = selected_analysis_feature.get()
    if selected_feature:
        display_selected_plot(selected_feature)
    else:
        messagebox.showwarning("Warning", "Please select an analysis feature.")

def submit_model_training():
    selected_dataset = selected_training_dataset.get()
    selected_model = selected_training_model.get()
    
    if selected_dataset and selected_model:
        # Perform model training based on selected dataset and model
        messagebox.showinfo("Model Training", f"Training model using {selected_model} on {selected_dataset}.")
    else:
        messagebox.showwarning("Warning", "Please select dataset and model for training.")

root = tk.Tk()
root.title("Intrusion Detection System Analysis")

# Create a notebook (tabbed interface)
notebook = ttk.Notebook(root)
notebook.pack(pady=10, fill='both', expand=True)

# First tab: "Test Yourself"
test_yourself_frame = ttk.Frame(notebook)
notebook.add(test_yourself_frame, text='Test Yourself')

# Create frames for basic features, network features, and model selection
basic_features_frame = ttk.LabelFrame(test_yourself_frame, text='Basic Features')
basic_features_frame.grid(row=0, column=0, padx=10, pady=10, sticky='nsew')

network_features_frame = ttk.LabelFrame(test_yourself_frame, text='Network Features')
network_features_frame.grid(row=0, column=1, padx=10, pady=10, sticky='nsew')

model_selection_frame = ttk.LabelFrame(test_yourself_frame, text='Model Selection')
model_selection_frame.grid(row=0, column=2, padx=10, pady=10, sticky='nsew')

# Populate basic features frame (example)
basic_features = [
    "duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land", "wrong_fragment",
    "urgent", "hot", "num_failed_logins", "logged_in", "num_compromised", "root_shell", "su_attempted"
]

for idx, feature in enumerate(basic_features):
    label = ttk.Label(basic_features_frame, text=feature)
    label.grid(row=idx, column=0, padx=5, pady=2, sticky='w')
    entry = ttk.Entry(basic_features_frame, width=10)
    entry.grid(row=idx, column=1, padx=5, pady=2)

# Populate network features frame (example)
network_features = [
    "num_root", "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds", "is_host_login",
    "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate", "rerror_rate", "srv_rerror_rate",
    "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate", "dst_host_count", "dst_host_srv_count",
    "dst_host_same_srv_rate", "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate",
    "dst_host_serror_rate", "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate"
]

for idx, feature in enumerate(network_features):
    label = ttk.Label(network_features_frame, text=feature)
    label.grid(row=idx, column=0, padx=5, pady=2, sticky='w')
    entry = ttk.Entry(network_features_frame, width=10)
    entry.grid(row=idx, column=1, padx=5, pady=2)

# Model selection dropdown and Predict button within model selection frame
model_options = ["Decision Tree", "Random Forest", "SVM", "Logistic Regression", "Gradient Boosting"]
selected_model_test = tk.StringVar(model_selection_frame)
selected_model_test.set("Select Model")  

model_menu = ttk.OptionMenu(model_selection_frame, selected_model_test, *model_options)
model_menu.grid(row=0, column=0, padx=5, pady=5)

def perform_prediction_test():
    try:
        input_data = []
        all_entries = basic_features_frame.winfo_children() + network_features_frame.winfo_children()
        for widget in all_entries:
            if isinstance(widget, ttk.Entry):
                value = float(widget.get()) if widget.get().strip().replace('.', '', 1).isdigit() else 0.0
                input_data.append(value)

        selected_model = selected_model_test.get()
        
        model_map = {
            "Decision Tree": model2,
            "Random Forest": model3,
            "SVM": model4,
            "Logistic Regression": model5,
            "Gradient Boosting": model6
        }

        if selected_model in model_map:
            model = model_map[selected_model]
            predicted_output = predict_output(model, input_data)
            messagebox.showinfo("Prediction Result", f"Predicted Output (Y): {predicted_output}")
        else:
            messagebox.showwarning("Warning", "Please select a valid model for prediction.")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {str(e)}")

button_predict_test = ttk.Button(model_selection_frame, text="Predict Output (Y)", command=perform_prediction_test)
button_predict_test.grid(row=1, column=0, padx=5, pady=5)

# Configure column weights to evenly distribute the width
test_yourself_frame.grid_columnconfigure(0, weight=1)
test_yourself_frame.grid_columnconfigure(1, weight=1)
test_yourself_frame.grid_columnconfigure(2, weight=1)

# Second tab: "Analysis Results"
analysis_results_frame = ttk.Frame(notebook)
notebook.add(analysis_results_frame, text='Analysis Results')

# Dropdown for selecting analysis feature
selected_analysis_feature = tk.StringVar()
analysis_feature_options = ["Training Accuracy", "Testing Accuracy", "Training Time", "Testing Time"]
analysis_dropdown = ttk.OptionMenu(analysis_results_frame, selected_analysis_feature, *analysis_feature_options)
analysis_dropdown.pack(padx=10, pady=10)

# Button to display selected analysis feature
analysis_button = ttk.Button(analysis_results_frame, text="Show Analysis", command=collect_analysis_result)
analysis_button.pack(padx=10, pady=10)

# Third tab: "Model Training"
model_training_frame = ttk.Frame(notebook)
notebook.add(model_training_frame, text='Model Training')

# Dropdowns for selecting dataset and model
selected_training_dataset = tk.StringVar()
training_dataset_options = ["KDD Cup"]
dataset_dropdown = ttk.OptionMenu(model_training_frame, selected_training_dataset, *training_dataset_options)
dataset_dropdown.pack(padx=10, pady=10)

selected_training_model = tk.StringVar()
model_training_options = ["Decision Tree", "Random Forest", "SVM", "Logistic Regression", "Gradient Boosting"]
model_training_dropdown = ttk.OptionMenu(model_training_frame, selected_training_model, *model_training_options)
model_training_dropdown.pack(padx=10, pady=10)

# Button to submit model training
model_training_button = ttk.Button(model_training_frame, text="Train Model", command=submit_model_training)
model_training_button.pack(padx=10, pady=10)

# Run the Tkinter main loop
root.mainloop()


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier

def load_data(file_path):
    columns = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment',
               'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell', 'su_attempted',
               'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login',
               'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate',
               'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
               'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
               'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
               'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'target']
    
    df = pd.read_csv(file_path, names=columns)
    
    attacks_types = {
        'normal': 'normal',
        'back': 'dos',
        'buffer_overflow': 'u2r',
        'ftp_write': 'r2l',
        'guess_passwd': 'r2l',
        'imap': 'r2l',
        'ipsweep': 'probe',
        'land': 'dos',
        'loadmodule': 'u2r',
        'multihop': 'r2l',
        'neptune': 'dos',
        'nmap': 'probe',
        'perl': 'u2r',
        'phf': 'r2l',
        'pod': 'dos',
        'portsweep': 'probe',
        'rootkit': 'u2r',
        'satan': 'probe',
        'smurf': 'dos',
        'spy': 'r2l',
        'teardrop': 'dos',
        'warezclient': 'r2l',
        'warezmaster': 'r2l'
    }
    
    df['Attack Type'] = df['target'].apply(lambda r: attacks_types[r[:-1]])
    
    return df

def preprocess_data(df):
    df = df.dropna(axis='columns')
    
    # Dropping highly correlated columns
    df.drop(['num_root', 'srv_serror_rate', 'srv_rerror_rate', 'dst_host_srv_serror_rate',
             'dst_host_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
             'dst_host_same_srv_rate'], axis=1, inplace=True)
    
    # Feature mapping
    df['protocol_type'] = df['protocol_type'].map({'icmp': 0, 'tcp': 1, 'udp': 2})
    df['flag'] = df['flag'].map({'SF': 0, 'S0': 1, 'REJ': 2, 'RSTR': 3, 'RSTO': 4, 'SH': 5,
                                 'S1': 6, 'S2': 7, 'RSTOS0': 8, 'S3': 9, 'OTH': 10})
    
    df.drop('service', axis=1, inplace=True)
    
    return df

def train_and_evaluate_model(model, X_train, X_test, Y_train, Y_test):
    start_time = time.time()
    model.fit(X_train, Y_train.values.ravel())
    end_time = time.time()
    
    train_time = end_time - start_time
    
    start_time = time.time()
    Y_test_pred = model.predict(X_test)
    end_time = time.time()
    
    test_time = end_time - start_time
    
    train_score = model.score(X_train, Y_train)
    test_score = model.score(X_test, Y_test)
    
    return train_score, test_score, train_time, test_time

def main():
    file_path = "C:\\Users\\Harmehar\\Desktop\\Intrusion-Detection-System\\dataset\\kddcup.data_10_percent.gz"
    df = load_data(file_path)
    df = preprocess_data(df)
    
    Y = df[['Attack Type']]
    X = df.drop(['Attack Type'], axis=1)
    
    sc = MinMaxScaler()
    X = sc.fit_transform(X)
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
    
    models = {
        'Naive Bayes': GaussianNB(),
        'Decision Tree': DecisionTreeClassifier(criterion="entropy", max_depth=4),
        'Random Forest': RandomForestClassifier(n_estimators=30),
        'SVM': SVC(gamma='scale'),
        'Logistic Regression': LogisticRegression(max_iter=1200000),
        'Gradient Boosting': GradientBoostingClassifier(random_state=0)
    }
    
    results = {}
    
    for name, model in models.items():
        train_score, test_score, train_time, test_time = train_and_evaluate_model(model, X_train, X_test, Y_train, Y_test)
        results[name] = {
            'Train Score': train_score,
            'Test Score': test_score,
            'Train Time': train_time,
            'Test Time': test_time
        }
    
    # Display results or save them as needed
    for name, metrics in results.items():
        print(f"{name}:")
        print(f"  Train Score: {metrics['Train Score']:.3f}")
        print(f"  Test Score: {metrics['Test Score']:.3f}")
        print(f"  Train Time: {metrics['Train Time']:.3f} seconds")
        print(f"  Test Time: {metrics['Test Time']:.3f} seconds")
        print()

if __name__ == "__main__":
    main()


In [18]:
import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier

pmap = {'icmp': 0, 'tcp': 1, 'udp': 2}
fmap = {'SF': 0, 'S0': 1, 'REJ': 2, 'RSTR': 3, 'RSTO': 4, 'SH': 5, 'S1': 6, 'S2': 7, 'RSTOS0': 8, 'S3': 9, 'OTH': 10}
attacks_types = {
    'normal': 'normal', 'back': 'dos', 'buffer_overflow': 'u2r', 'ftp_write': 'r2l',
    'guess_passwd': 'r2l', 'imap': 'r2l', 'ipsweep': 'probe', 'land': 'dos',
    'loadmodule': 'u2r', 'multihop': 'r2l', 'neptune': 'dos', 'nmap': 'probe',
    'perl': 'u2r', 'phf': 'r2l', 'pod': 'dos', 'portsweep': 'probe',
    'rootkit': 'u2r', 'satan': 'probe', 'smurf': 'dos', 'spy': 'r2l',
    'teardrop': 'dos', 'warezclient': 'r2l', 'warezmaster': 'r2l'
}

# Read dataset and perform preprocessing
def preprocess_dataset(dataset_path):
    cols = """duration,
    protocol_type,
    service,
    flag,
    src_bytes,
    dst_bytes,
    land,
    wrong_fragment,
    urgent,
    hot,
    num_failed_logins,
    logged_in,
    num_compromised,
    root_shell,
    su_attempted,
    num_root,
    num_file_creations,
    num_shells,
    num_access_files,
    num_outbound_cmds,
    is_host_login,
    is_guest_login,
    count,
    srv_count,
    serror_rate,
    srv_serror_rate,
    rerror_rate,
    srv_rerror_rate,
    same_srv_rate,
    diff_srv_rate,
    srv_diff_host_rate,
    dst_host_count,
    dst_host_srv_count,
    dst_host_same_srv_rate,
    dst_host_diff_srv_rate,
    dst_host_same_src_port_rate,
    dst_host_srv_diff_host_rate,
    dst_host_serror_rate,
    dst_host_srv_serror_rate,
    dst_host_rerror_rate,
    dst_host_srv_rerror_rate"""

    # Parse the columns string to extract valid column names
    columns = []
    for c in cols.split(','):
        if c.strip():
            columns.append(c.strip())

    # Append the target column name to the list of columns
    columns.append('target')

    # Print the list of columns and its length
    print(columns)
    print(len(columns))

    attacks_types = {
        'normal': 'normal',
        'back': 'dos',
        'buffer_overflow': 'u2r',
        'ftp_write': 'r2l',
        'guess_passwd': 'r2l',
        'imap': 'r2l',
        'ipsweep': 'probe',
        'land': 'dos',
        'loadmodule': 'u2r',
        'multihop': 'r2l',
        'neptune': 'dos',
        'nmap': 'probe',
        'perl': 'u2r',
        'phf': 'r2l',
        'pod': 'dos',
        'portsweep': 'probe',
        'rootkit': 'u2r',
        'satan': 'probe',
        'smurf': 'dos',
        'spy': 'r2l',
        'teardrop': 'dos',
        'warezclient': 'r2l',
        'warezmaster': 'r2l',
    }
    
    dataset_path = "C:\\Users\\Harmehar\\Desktop\\Intrusion-Detection-System\\dataset\\kddcup.data_10_percent.gz"
    df = pd.read_csv(dataset_path, names=columns)
    df['Attack Type'] = df.target.apply(lambda r: attacks_types[r[:-1]])
    
    # Adding Attack Type column based on the target column
    df['Attack Type'] = df.target.apply(lambda r: attacks_types[r[:-1]])

    # Drop columns with NaN values and keep columns with more than 1 unique value
    df = df.dropna(axis='columns')
    df = df[[col for col in df if df[col].nunique() > 1]]

    # Remove highly correlated features
    df.drop(['num_root', 'srv_serror_rate', 'srv_rerror_rate', 'dst_host_srv_serror_rate',
             'dst_host_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
             'dst_host_same_srv_rate'], axis=1, inplace=True)

    # Feature mapping for categorical variables
    df['protocol_type'] = df['protocol_type'].map(pmap)
    df['flag'] = df['flag'].map(fmap)

    # Drop 'service' column
    df.drop('service', axis=1, inplace=True)

    # Split data into features (X) and target (Y)
    Y = df[['Attack Type']]
    X = df.drop(['Attack Type', 'target'], axis=1)

    # Scale the features using Min-Max Scaling
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    # Split data into training and testing sets
    X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.33, random_state=42)

    # Return preprocessed data
    X_train, X_test, Y_train, Y_test
    
    return df

# Train the selected model on the dataset
def train_model(model_name, X_train, Y_train):
    if model_name == 'Naive Bayes':
        model = GaussianNB()
    elif model_name == 'Decision Tree':
        model = DecisionTreeClassifier(criterion="entropy", max_depth=4)
    elif model_name == 'Random Forest':
        model = RandomForestClassifier(n_estimators=30)
    elif model_name == 'SVM':
        model = SVC(gamma='scale')
    elif model_name == 'Logistic Regression':
        model = LogisticRegression(max_iter=1200000)
    elif model_name == 'Gradient Boosting':
        model = GradientBoostingClassifier(random_state=0)
    else:
        return None
    
    model.fit(X_train, Y_train)
    return model

# Predict output using the trained model
def predict_output(model, input_data):
    try:
        X = np.array(input_data).reshape(1, -1)
        Y_output = model.predict(X)
        return Y_output[0]
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {str(e)}")

# Display selected plot based on analysis feature
def display_selected_plot(plot_type):
    plot_mapping = {...}
    if plot_type in plot_mapping:
        title, names, values = plot_mapping[plot_type]
        fig = plt.figure(figsize=(6, 4))
        plt.bar(names, values)
        plt.title(title)
        show_plot(fig, title)

def show_plot(fig, title):
    plot_window = tk.Toplevel(root)
    plot_window.title(title)
    canvas = FigureCanvasTkAgg(fig, master=plot_window)
    canvas.draw()
    canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

def collect_analysis_result():
    selected_feature = selected_analysis_feature.get()
    if selected_feature:
        display_selected_plot(selected_feature)
    else:
        messagebox.showwarning("Warning", "Please select an analysis feature.")

def submit_model_training():
    selected_dataset = selected_training_dataset.get()
    selected_model = selected_training_model.get()
    
    if selected_dataset and selected_model:
        dataset_path = f"path/to/datasets/{selected_dataset}.csv"  # Update with dataset path
        df = preprocess_dataset(dataset_path)
        
        Y = df[['Attack Type']]
        X = df.drop(['Attack Type', 'target'], axis=1)
        
        sc = MinMaxScaler()
        X = sc.fit_transform(X)
        
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
        
        trained_model = train_model(selected_model, X_train, Y_train.values.ravel())
        if trained_model:
            messagebox.showinfo("Model Training", f"Training completed using {selected_model} on {selected_dataset}.")
    else:
        messagebox.showwarning("Warning", "Please select dataset and model for training.")

root = tk.Tk()
root.title("Intrusion Detection System Analysis")

# Create a notebook (tabbed interface)
notebook = ttk.Notebook(root)
notebook.pack(pady=10, fill='both', expand=True)

# First tab: "Test Yourself"
test_yourself_frame = ttk.Frame(notebook)
notebook.add(test_yourself_frame, text='Test Yourself')

# Create frames for basic features, network features, and model selection
basic_features_frame = ttk.LabelFrame(test_yourself_frame, text='Basic Features')
basic_features_frame.grid(row=0, column=0, padx=10, pady=10, sticky='nsew')

network_features_frame = ttk.LabelFrame(test_yourself_frame, text='Network Features')
network_features_frame.grid(row=0, column=1, padx=10, pady=10, sticky='nsew')

model_selection_frame = ttk.LabelFrame(test_yourself_frame, text='Model Selection')
model_selection_frame.grid(row=0, column=2, padx=10, pady=10, sticky='nsew')

# Populate basic features frame (example)
basic_features = ["duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land", "wrong_fragment",
                   "urgent", "hot", "num_failed_logins", "logged_in", "num_compromised", "root_shell", "su_attempted"]

for idx, feature in enumerate(basic_features):
    label = ttk.Label(basic_features_frame, text=feature)
    label.grid(row=idx, column=0, padx=5, pady=2, sticky='w')
    entry = ttk.Entry(basic_features_frame, width=10)
    entry.grid(row=idx, column=1, padx=5, pady=2)

# Populate network features frame (example)
network_features = ["num_root", "num_file_creations", "num_shells", "num_access_files", "num_outbound_cmds",
                    "is_host_login", "is_guest_login", "count", "srv_count", "serror_rate", "srv_serror_rate",
                    "rerror_rate", "srv_rerror_rate", "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate",
                    "dst_host_count", "dst_host_srv_count", "dst_host_same_srv_rate", "dst_host_diff_srv_rate",
                    "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate", "dst_host_serror_rate",
                    "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate"]

for idx, feature in enumerate(network_features):
    label = ttk.Label(network_features_frame, text=feature)
    label.grid(row=idx, column=0, padx=5, pady=2, sticky='w')
    entry = ttk.Entry(network_features_frame, width=10)
    entry.grid(row=idx, column=1, padx=5, pady=2)

# Model selection dropdown and Predict button within model selection frame
model_options = ["Naive Bayes", "Decision Tree", "Random Forest", "SVM", "Logistic Regression", "Gradient Boosting"]
selected_model_test = tk.StringVar(model_selection_frame)
selected_model_test.set("Select Model")

model_menu = ttk.OptionMenu(model_selection_frame, selected_model_test, *model_options)
model_menu.grid(row=0, column=0, padx=5, pady=5)

def perform_prediction_test():
    try:
        input_data = []
        all_entries = basic_features_frame.winfo_children() + network_features_frame.winfo_children()
        for widget in all_entries:
            if isinstance(widget, ttk.Entry):
                value = float(widget.get()) if widget.get().strip().replace('.', '', 1).isdigit() else 0.0
                input_data.append(value)

        selected_model = selected_model_test.get()

        model = train_model(selected_model, X_train, Y_train.values.ravel())
        if model:
            predicted_output = predict_output(model, input_data)
            messagebox.showinfo("Prediction Result", f"Predicted Output (Y): {predicted_output}")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {str(e)}")

button_predict_test = ttk.Button(model_selection_frame, text="Predict Output (Y)", command=perform_prediction_test)
button_predict_test.grid(row=1, column=0, padx=5, pady=5)

# Second tab: "Analysis Results"
analysis_results_frame = ttk.Frame(notebook)
notebook.add(analysis_results_frame, text='Analysis Results')

# Dropdown for selecting analysis feature
selected_analysis_feature = tk.StringVar()
analysis_feature_options = ["Training Accuracy", "Testing Accuracy", "Training Time", "Testing Time"]
analysis_dropdown = ttk.OptionMenu(analysis_results_frame, selected_analysis_feature, *analysis_feature_options)
analysis_dropdown.pack(padx=10, pady=10)

# Button to display selected analysis feature
analysis_button = ttk.Button(analysis_results_frame, text="Show Analysis", command=collect_analysis_result)
analysis_button.pack(padx=10, pady=10)

# Third tab: "Model Training"
model_training_frame = ttk.Frame(notebook)
notebook.add(model_training_frame, text='Model Training')

# Dropdowns for selecting dataset and model
selected_training_dataset = tk.StringVar()
training_dataset_options = ["KDD Cup"]
dataset_dropdown = ttk.OptionMenu(model_training_frame, selected_training_dataset, *training_dataset_options)
dataset_dropdown.pack(padx=10, pady=10)

selected_training_model = tk.StringVar()
model_training_options = ["Naive Bayes", "Decision Tree", "Random Forest", "SVM", "Logistic Regression", "Gradient Boosting"]
model_training_dropdown = ttk.OptionMenu(model_training_frame, selected_training_model, *model_training_options)
model_training_dropdown.pack(padx=10, pady=10)

# Button to submit model training
model_training_button = ttk.Button(model_training_frame, text="Train Model", command=submit_model_training)
model_training_button.pack(padx=10, pady=10)

# Configure column weights to evenly distribute the width
test_yourself_frame.grid_columnconfigure(0, weight=1)
test_yourself_frame.grid_columnconfigure(1, weight=1)
test_yourself_frame.grid_columnconfigure(2, weight=1)

# Run the Tkinter main loop
root.mainloop()


In [None]:
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier

pmap = {'icmp': 0, 'tcp': 1, 'udp': 2}
fmap = {'SF': 0, 'S0': 1, 'REJ': 2, 'RSTR': 3, 'RSTO': 4, 'SH': 5, 'S1': 6, 'S2': 7, 'RSTOS0': 8, 'S3': 9, 'OTH': 10}

def preprocess_dataset(dataset_path):
    cols = """duration, protocol_type, service, flag, src_bytes, dst_bytes, land, wrong_fragment,
              urgent, hot, num_failed_logins, logged_in, num_compromised, root_shell, su_attempted,
              num_root, num_file_creations, num_shells, num_access_files, num_outbound_cmds,
              is_host_login, is_guest_login, count, srv_count, serror_rate, srv_serror_rate,
              rerror_rate, srv_rerror_rate, same_srv_rate, diff_srv_rate, srv_diff_host_rate,
              dst_host_count, dst_host_srv_count, dst_host_same_srv_rate, dst_host_diff_srv_rate,
              dst_host_same_src_port_rate, dst_host_srv_diff_host_rate, dst_host_serror_rate,
              dst_host_srv_serror_rate, dst_host_rerror_rate, dst_host_srv_rerror_rate"""

    columns = [c.strip() for c in cols.split(',')]

    df = pd.read_csv(dataset_path, names=columns)

    attacks_types = {
        'normal': 'normal', 'back': 'dos', 'buffer_overflow': 'u2r', 'ftp_write': 'r2l',
        'guess_passwd': 'r2l', 'imap': 'r2l', 'ipsweep': 'probe', 'land': 'dos',
        'loadmodule': 'u2r', 'multihop': 'r2l', 'neptune': 'dos', 'nmap': 'probe',
        'perl': 'u2r', 'phf': 'r2l', 'pod': 'dos', 'portsweep': 'probe',
        'rootkit': 'u2r', 'satan': 'probe', 'smurf': 'dos', 'spy': 'r2l',
        'teardrop': 'dos', 'warezclient': 'r2l', 'warezmaster': 'r2l'
    }

    df['Attack Type'] = df.target.apply(lambda r: attacks_types[r[:-1]])

    # Drop columns with NaN values and columns with only one unique value
    df.dropna(axis='columns', inplace=True)
    df = df[[col for col in df if df[col].nunique() > 1]]

    # Remove highly correlated features
    df.drop(['num_root', 'srv_serror_rate', 'srv_rerror_rate', 'dst_host_srv_serror_rate',
             'dst_host_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
             'dst_host_same_srv_rate'], axis=1, inplace=True)

    # Feature mapping for categorical variables
    df['protocol_type'] = df['protocol_type'].map(pmap)
    df['flag'] = df['flag'].map(fmap)

    # Drop 'service' column
    df.drop('service', axis=1, inplace=True)

    # Split data into features (X) and target (Y)
    Y = df[['Attack Type']]
    X = df.drop(['Attack Type', 'target'], axis=1)

    # Scale the features using Min-Max Scaling
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, Y

def train_model(model_name, X_train, Y_train):
    if model_name == 'Naive Bayes':
        model = GaussianNB()
    elif model_name == 'Decision Tree':
        model = DecisionTreeClassifier(criterion="entropy", max_depth=4)
    elif model_name == 'Random Forest':
        model = RandomForestClassifier(n_estimators=30)
    elif model_name == 'SVM':
        model = SVC(gamma='scale')
    elif model_name == 'Logistic Regression':
        model = LogisticRegression(max_iter=1200000)
    elif model_name == 'Gradient Boosting':
        model = GradientBoostingClassifier(random_state=0)
    else:
        return None
    
    model.fit(X_train, Y_train)
    return model

def perform_model_testing(model, input_data):
    try:
        X = input_data.reshape(1, -1)
        predicted_output = model.predict(X)
        return predicted_output[0]
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {str(e)}")

def submit_model_training():
    dataset_path = "C:\\Users\\Harmehar\\Desktop\\Intrusion-Detection-System\\dataset\\kddcup.data_10_percent.gz"
    X_train, Y_train = preprocess_dataset(dataset_path)
    
    selected_model = selected_training_model.get()
    if selected_model:
        trained_model = train_model(selected_model, X_train, Y_train.values.ravel())
        if trained_model:
            messagebox.showinfo("Model Training", f"Training completed using {selected_model} on KDD Cup dataset.")

            # Enable the testing button after training
            button_test_model.config(state=tk.NORMAL)
        else:
            messagebox.showwarning("Warning", "Failed to train the model.")
    else:
        messagebox.showwarning("Warning", "Please select a model for training.")

def test_trained_model():
    selected_model = selected_training_model.get()
    if selected_model:
        try:
            # Retrieve input data for testing
            input_data = []
            for entry in testing_entries:
                value = float(entry.get()) if entry.get().strip().replace('.', '', 1).isdigit() else 0.0
                input_data.append(value)

            # Perform model testing
            predicted_output = perform_model_testing(trained_model, np.array(input_data))
            messagebox.showinfo("Model Testing", f"Predicted Output (Y): {predicted_output}")
        except Exception as e:
            messagebox.showerror("Error", f"An error occurred during model testing: {str(e)}")
    else:
        messagebox.showwarning("Warning", "Please select a model for testing.")

root = tk.Tk()
root.title("Intrusion Detection System Analysis")

# Model Training Frame
model_training_frame = ttk.Frame(root)
model_training_frame.pack(pady=10)

# Dropdown for selecting model
selected_training_model = tk.StringVar()
model_training_options = ["Naive Bayes", "Decision Tree", "Random Forest", "SVM", "Logistic Regression", "Gradient Boosting"]
model_training_dropdown = ttk.OptionMenu(model_training_frame, selected_training_model, *model_training_options)
model_training_dropdown.grid(row=0, column=0, padx=10, pady=10)

# Button to submit model training
train_button = ttk.Button(model_training_frame, text="Train Model", command=submit_model_training)
train_button.grid(row=0, column=1, padx=10, pady=10)

# Testing Frame
testing_frame = ttk.Frame(root)
testing_frame.pack(pady=10)

# Entries for testing input
testing_entries = []
for idx, feature in enumerate(["Duration", "Protocol Type", "Service", "Flag", "Src Bytes", "Dst Bytes"]):
    label = ttk.Label(testing_frame, text=feature)
    label.grid(row=idx, column=0, padx=10, pady=5, sticky='w')
    entry = ttk.Entry(testing_frame)
    entry.grid(row=idx, column=1, padx=10, pady=5)
    testing_entries.append(entry)

# Button to test the trained model
button_test_model = ttk.Button(testing_frame, text="Test Model", command=test_trained_model, state=tk.DISABLED)
button_test_model.grid(row=len(testing_entries), column=0, columnspan=2, pady=10)

root.mainloop()


Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\Harmehar\anaconda3\lib\tkinter\__init__.py", line 1921, in __call__
    return self.func(*args)
  File "C:\Users\Harmehar\AppData\Local\Temp\ipykernel_13392\1569582700.py", line 96, in submit_model_training
    X_train, Y_train = preprocess_dataset(dataset_path)
  File "C:\Users\Harmehar\AppData\Local\Temp\ipykernel_13392\1569582700.py", line 39, in preprocess_dataset
    df['Attack Type'] = df.target.apply(lambda r: attacks_types[r[:-1]])
  File "C:\Users\Harmehar\anaconda3\lib\site-packages\pandas\core\generic.py", line 5902, in __getattr__
    return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'target'
