In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from PyQt5.QtWidgets import QApplication, QWidget, QPushButton, QVBoxLayout, QFileDialog, QMessageBox, QTextEdit, QLabel, QTableWidget, QTableWidgetItem, QHBoxLayout, QGroupBox, QComboBox  # Add QComboBox here
from PyQt5.QtGui import QIcon
from PyQt5.QtCore import Qt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import arff

class DataAnalysisGUI(QWidget):
    def __init__(self):
        super().__init__()

        self.init_ui()

    def init_ui(self):
        # Set window title
        self.setWindowTitle("System Eye")

        # Set application icon
        self.setWindowIcon(QIcon('IDS/WhatsApp Image 2024-05-07 at 21.00.22_37f3adb5.jpg'))

        # Set background color to white and text color to blue
        self.setStyleSheet("background-color: rgb(0, 153, 153); color: rgb(255, 255, 255);")

        # Add watermark
        watermark_label = QLabel(self)
        watermark_label.setGeometry(20, 20, 200, 50)
        watermark_label.setStyleSheet("font-size: 20px; color: rgba(255, 255, 255); border: 2px solid :#f44336 ; padding: 5px;")
        watermark_label.setText(" (IDS) Intrusion Detection System")

        # Upload button
        self.upload_button = QPushButton("Upload Dataset(s)", self)
        self.upload_button.setStyleSheet("background-color: #f44336; color: white;")  # Red color
        self.upload_button.clicked.connect(self.upload_datasets)

        # Analyze button
        self.analyze_button = QPushButton("Analyze", self)
        self.analyze_button.setStyleSheet("background-color: #f44336; color: white;")  # Red color
        self.analyze_button.clicked.connect(self.analyze_data)
        self.analyze_button.setEnabled(False)  # Disable analyze button initially

        # Preprocessing method selection
        self.preprocessing_label = QLabel("Preprocessing Method:", self)
        self.preprocessing_combobox = QComboBox(self)
        self.preprocessing_combobox.addItem("None")
        self.preprocessing_combobox.addItem("Standardization")
        self.preprocessing_combobox.addItem("Normalization")
        self.preprocessing_combobox.setCurrentIndex(0)  # Set default to None

        # Target column selection
        self.target_column_label = QLabel("Select Target Column:", self)
        self.target_column_combobox = QComboBox(self)

        # Results text edit
        self.results_textedit = QTextEdit(self)
        self.results_textedit.setReadOnly(True)
        self.results_textedit.setStyleSheet("background-color: rgb(255, 255, 255); color: rgb(0, 0, 0);")  # White background, black text

        # Table widget for displaying results
        self.results_table = QTableWidget(self)
        self.results_table.setColumnCount(4)
        self.results_table.setHorizontalHeaderLabels(['Attack', '# training packets', '# normal test packets', '# malicious test packets'])
        self.results_table.setStyleSheet("background-color: rgb(255, 255, 255); color: rgb(0, 0, 0);")  # White background, black text

        # Group box for text results
        text_results_groupbox = QGroupBox("Text Results", self)
        text_results_layout = QVBoxLayout()
        text_results_layout.addWidget(self.results_textedit)
        text_results_groupbox.setLayout(text_results_layout)

        # Group box for table results
        table_results_groupbox = QGroupBox("Table Results", self)
        table_results_layout = QVBoxLayout()
        table_results_layout.addWidget(self.results_table)
        table_results_groupbox.setLayout(table_results_layout)

        # Group box for charts
        charts_groupbox = QGroupBox("Charts", self)
        charts_layout = QVBoxLayout()
        self.charts_canvas = FigureCanvas(plt.figure(figsize=(10, 6)))
        charts_layout.addWidget(self.charts_canvas)
        charts_groupbox.setLayout(charts_layout)

        # Layout for buttons and results
        buttons_layout = QVBoxLayout()
        buttons_layout.addWidget(self.upload_button)
        buttons_layout.addWidget(self.analyze_button)
        buttons_layout.addWidget(self.preprocessing_label)
        buttons_layout.addWidget(self.preprocessing_combobox)
        buttons_layout.addWidget(self.target_column_label)
        buttons_layout.addWidget(self.target_column_combobox)

        results_layout = QHBoxLayout()
        results_layout.addWidget(text_results_groupbox)
        results_layout.addWidget(table_results_groupbox)

        # Main layout
        main_layout = QVBoxLayout()
        main_layout.addWidget(watermark_label)
        main_layout.addLayout(buttons_layout)
        main_layout.addLayout(results_layout)
        main_layout.addWidget(charts_groupbox)
        self.setLayout(main_layout)

        self.resize(800, 600)  # Increase GUI size
        self.show()

    def upload_datasets(self):
        file_paths, _ = QFileDialog.getOpenFileNames(self, "Open Dataset(s)", "", "CSV Files (*.csv);;Text Files (*.txt);;ARFF Files (*.arff);;All Files (*)")
        if file_paths:
            print(f"Selected file(s): {file_paths}")
            self.file_paths = file_paths
            self.analyze_button.setEnabled(True)  # Enable analyze button after file upload

            # Populate target column combobox
            self.populate_target_column_combobox(file_paths)

    def populate_target_column_combobox(self, file_paths):
        # Clear combobox before repopulating
        self.target_column_combobox.clear()

        for file_path in file_paths:
            # Load dataset based on file extension
            if file_path.endswith('.csv'):
                df = pd.read_csv(file_path, nrows=1)  # Read only the first row to get column names
            elif file_path.endswith('.txt'):
                df = pd.read_csv(file_path, sep='\t', nrows=1)  # Assuming tab-separated values (TSV) format
            elif file_path.endswith('.arff'):
                with open(file_path, 'r') as f:
                    arff_data = arff.load(f)
                df = pd.DataFrame(arff_data['data'], columns=[att[0] for att in arff_data['attributes']])
            else:
                continue

            # Add column names to combobox
            self.target_column_combobox.addItems(df.columns)

    def analyze_data(self):
        try:
            # Iterate over each selected dataset
            for file_path in self.file_paths:
                # Load dataset based on file extension
                if file_path.endswith('.csv'):
                    # Read CSV file with low_memory set to False to avoid DtypeWarning
                    df = pd.read_csv(file_path, low_memory=False)
                elif file_path.endswith('.txt'):
                    df = pd.read_csv(file_path, sep='\t')  # Assuming tab-separated values (TSV) format
                elif file_path.endswith('.arff'):
                    # Load ARFF file using the arff library
                    with open(file_path, 'r') as f:
                        arff_data = arff.load(f)
                    df = pd.DataFrame(arff_data['data'], columns=[att[0] for att in arff_data['attributes']])
                else:
                    QMessageBox.warning(self, "Unsupported File", f"Unsupported file format for {file_path}. Skipping.")
                    continue  # Skip to the next file
                
                # Perform preprocessing
                df = self.preprocess_data(df)

                # Choose target column
                target_column = self.target_column_combobox.currentText()

                # Train models and get accuracies
                dt_accuracy, rf_accuracy, nn_accuracy = self.train_models(df, target_column)

                # Display model accuracies
                self.display_model_accuracies(dt_accuracy, rf_accuracy, nn_accuracy)

                # Display results in table
                self.display_results(df)

                # Generate and display charts based on data analysis
                self.generate_charts(df, target_column)
            
            # Display a message box indicating analysis completion
            QMessageBox.information(self, "Analysis Completed", "Data analysis completed successfully.")
        except Exception as e:
            QMessageBox.critical(self, "Error", f"An error occurred during data analysis: {str(e)}")

    def preprocess_data(self, df):
        preprocessing_method = self.preprocessing_combobox.currentText()

        if preprocessing_method == "Standardization":
            df = self.standardize_data(df)
        elif preprocessing_method == "Normalization":
            df = self.normalize_data(df)

        return df

    def standardize_data(self, df):
        # Implement standardization preprocessing method
        # Placeholder function, replace with actual implementation
        return df

    def normalize_data(self, df):
        # Implement normalization preprocessing method
        # Placeholder function, replace with actual implementation
        return df

    def train_models(self, df, target_column):
        # Split dataset into features and target
        X = df.drop(columns=[target_column])
        y = df[target_column]

        # Split dataset into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train Decision Tree classifier
        dt_classifier = DecisionTreeClassifier()
        dt_classifier.fit(X_train, y_train)
        dt_predictions = dt_classifier.predict(X_test)
        dt_accuracy = accuracy_score(y_test, dt_predictions)

        # Train Random Forest classifier
        rf_classifier = RandomForestClassifier()
        rf_classifier.fit(X_train, y_train)
        rf_predictions = rf_classifier.predict(X_test)
        rf_accuracy = accuracy_score(y_test, rf_predictions)

        # Train Neural Network classifier
        nn_classifier = MLPClassifier()
        nn_classifier.fit(X_train, y_train)
        nn_predictions = nn_classifier.predict(X_test)
        nn_accuracy = accuracy_score(y_test, nn_predictions)

        return dt_accuracy, rf_accuracy, nn_accuracy

    def display_model_accuracies(self, dt_accuracy, rf_accuracy, nn_accuracy):
        # Display model accuracies in QTextEdit
        self.results_textedit.append(f"Decision Tree Accuracy: {dt_accuracy}")
        self.results_textedit.append(f"Random Forest Accuracy: {rf_accuracy}")
        self.results_textedit.append(f"Neural Network Accuracy: {nn_accuracy}")

    def display_results(self, df):
        # Set header labels based on the columns present in the DataFrame
        header_labels = list(df.columns)
        self.results_table.setColumnCount(len(header_labels))
        self.results_table.setHorizontalHeaderLabels(header_labels)

        # Display results in QTableWidget
        self.results_table.setRowCount(len(df))
        for i, row in df.iterrows():
            for j, value in enumerate(row):
                self.results_table.setItem(i, j, QTableWidgetItem(str(value)))

    def generate_charts(self, df, target_column):
        # Check if target column is present in the DataFrame
        if target_column in df.columns:
            # Generate charts based on the target column
            if df[target_column].dtype == 'object':
                self.generate_bar_chart(df, target_column)
            else:
                self.generate_histogram(df, target_column)

    def generate_bar_chart(self, df, target_column):
        # Count occurrences of each category in the target column
        category_counts = df[target_column].value_counts()

        # Plot bar chart
        ax = self.charts_canvas.figure.add_subplot(111)
        category_counts.plot(kind='bar', ax=ax)
        ax.set_xlabel(target_column)
        ax.set_ylabel('Count')
        ax.set_title(f'Bar Chart of {target_column}')
        ax.legend()
        self.charts_canvas.draw()

    def generate_histogram(self, df, target_column):
        # Plot histogram for numerical target column
        ax = self.charts_canvas.figure.add_subplot(111)
        ax.hist(df[target_column], bins=20)
        ax.set_xlabel(target_column)
        ax.set_ylabel('Frequency')
        ax.set_title(f'Histogram of {target_column}')
        self.charts_canvas.draw()

if __name__ == "__main__":
    app = QApplication(sys.argv)
    gui = DataAnalysisGUI()
    sys.exit(app.exec_())


Selected file(s): ['C:/Users/omark/Desktop/IDS projec by team/IDS with GUI/Dataset Properties.csv']
Selected file(s): ['C:/Users/omark/Desktop/IDS projec by team/IDS with GUI/Dataset Properties.csv']
Selected file(s): ['C:/Users/omark/Desktop/kdd_afreradd_Attack.arff']
