In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from PyQt5.QtWidgets import (
    QApplication, QWidget, QPushButton, QVBoxLayout, QFileDialog, QMessageBox, QTextEdit, QLabel, 
    QTableWidget, QTableWidgetItem, QHBoxLayout, QGroupBox, QComboBox
)
from PyQt5.QtGui import QIcon
from PyQt5.QtCore import Qt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.impute import KNNImputer
import arff

class DataAnalysisGUI(QWidget):
    def __init__(self):
        super().__init__()
        self.init_ui()
        self.charts_window = None  # Initialize charts window attribute

    def init_ui(self):
        # Set up the main window
        self.setWindowTitle("System Eye")
        self.setWindowIcon(QIcon('IDS/WhatsApp Image 2024-05-07 at 21.00.22_37f3adb5.jpg'))
        self.setStyleSheet("background-color: rgb(0, 153, 153); color: rgb(255, 255, 255);")

        # Add a watermark label
        watermark_label = QLabel(self)
        watermark_label.setGeometry(20, 20, 200, 50)
        watermark_label.setStyleSheet("font-size: 20px; color: rgba(255, 255, 255); border: 2px solid :#f44336 ; padding: 5px;")
        watermark_label.setText(" (IDS) Intrusion Detection System")

        # Upload button
        self.upload_button = QPushButton("Upload Dataset(s)", self)
        self.upload_button.setStyleSheet("background-color: #f44336; color: white;")
        self.upload_button.clicked.connect(self.upload_datasets)

        # Analyze button
        self.analyze_button = QPushButton("Analyze", self)
        self.analyze_button.setStyleSheet("background-color: #f44336; color: white;")
        self.analyze_button.clicked.connect(self.analyze_data)
        self.analyze_button.setEnabled(False)

        # Show charts button
        self.show_charts_button = QPushButton("Show All Charts", self)
        self.show_charts_button.setStyleSheet("background-color: #f44336; color: white;")
        self.show_charts_button.clicked.connect(self.show_all_charts)

        # Preprocessing method selection
        self.preprocessing_label = QLabel("Preprocessing Method:", self)
        self.preprocessing_combobox = QComboBox(self)
        self.preprocessing_combobox.addItems(["None", "Standardization", "Normalization", "KNN Imputation"])
        self.preprocessing_combobox.setCurrentIndex(0)

        # Target column selection
        self.target_column_label = QLabel("Select Target Column:", self)
        self.target_column_combobox = QComboBox(self)

        # Results display area
        self.results_textedit = QTextEdit(self)
        self.results_textedit.setReadOnly(True)
        self.results_textedit.setStyleSheet("background-color: rgb(255, 255, 255); color: rgb(0, 0, 0);")

        # Table for showing specific results
        self.results_table = QTableWidget(self)
        self.results_table.setColumnCount(4)
        self.results_table.setHorizontalHeaderLabels(['Attack Type', '# Training Packets', '# Normal Test Packets', '# Malicious Test Packets'])
        self.results_table.setStyleSheet("background-color: rgb(255, 255, 255); color: rgb(0, 0, 0);")

        # Group boxes for organizing UI components
        text_results_groupbox = QGroupBox("Text Results", self)
        text_results_layout = QVBoxLayout()
        text_results_layout.addWidget(self.results_textedit)
        text_results_groupbox.setLayout(text_results_layout)

        table_results_groupbox = QGroupBox("Table Results", self)
        table_results_layout = QVBoxLayout()
        table_results_layout.addWidget(self.results_table)
        table_results_groupbox.setLayout(table_results_layout)

        charts_groupbox = QGroupBox("Charts", self)
        charts_layout = QVBoxLayout()
        self.charts_canvas = FigureCanvas(plt.figure(figsize=(10, 6)))
        charts_layout.addWidget(self.charts_canvas)
        charts_groupbox.setLayout(charts_layout)

        # Layout for buttons
        buttons_layout = QVBoxLayout()
        buttons_layout.addWidget(self.upload_button)
        buttons_layout.addWidget(self.analyze_button)
        buttons_layout.addWidget(self.show_charts_button)
        buttons_layout.addWidget(self.preprocessing_label)
        buttons_layout.addWidget(self.preprocessing_combobox)
        buttons_layout.addWidget(self.target_column_label)
        buttons_layout.addWidget(self.target_column_combobox)

        # Main layout
        results_layout = QHBoxLayout()
        results_layout.addWidget(text_results_groupbox)
        results_layout.addWidget(table_results_groupbox)

        main_layout = QVBoxLayout()
        main_layout.addWidget(watermark_label)
        main_layout.addLayout(buttons_layout)
        main_layout.addLayout(results_layout)
        main_layout.addWidget(charts_groupbox)
        self.setLayout(main_layout)

        self.resize(1200, 800)
        self.show()

    def upload_datasets(self):
        # Open a file dialog to select datasets
        file_paths, _ = QFileDialog.getOpenFileNames(self, "Open Dataset(s)", "", "CSV Files (*.csv);;Text Files (*.txt);;ARFF Files (*.arff);;All Files (*)")
        if file_paths:
            self.file_paths = file_paths
            self.analyze_button.setEnabled(True)  # Enable analyze button after files are selected
            self.populate_target_column_combobox(file_paths)

    def populate_target_column_combobox(self, file_paths):
        # Populate the target column combobox with column names from the first dataset
        self.target_column_combobox.clear()
        for file_path in file_paths:
            if file_path.endswith('.csv'):
                df = pd.read_csv(file_path, nrows=1)
            elif file_path.endswith('.txt'):
                df = pd.read_csv(file_path, sep='\t', nrows=1)
            elif file_path.endswith('.arff'):
                with open(file_path, 'r') as f:
                    arff_data = arff.load(f)
                df = pd.DataFrame(arff_data['data'], columns=[att[0] for att in arff_data['attributes']])
            else:
                continue
            self.target_column_combobox.addItems(df.columns)

    def analyze_data(self):
        try:
            for file_path in self.file_paths:
                if file_path.endswith('.csv'):
                    df = pd.read_csv(file_path, low_memory=False)
                elif file_path.endswith('.txt'):
                    df = pd.read_csv(file_path, sep='\t')
                elif file_path.endswith('.arff'):
                    with open(file_path, 'r') as f:
                        arff_data = arff.load(f)
                    df = pd.DataFrame(arff_data['data'], columns=[att[0] for att in arff_data['attributes']])
                else:
                    QMessageBox.warning(self, "Unsupported File", f"Unsupported file format for {file_path}. Skipping.")
                    continue
                
                # Preprocess the data
                df = self.preprocess_data(df)
                target_column = self.target_column_combobox.currentText()
                
                # Train models and get accuracies
                accuracies = self.train_models(df, target_column)
                self.display_model_accuracies(accuracies)
                
                # Display detailed results
                self.display_results(df, target_column)
                
                # Generate and display charts
                self.generate_charts(df, target_column)
            
            QMessageBox.information(self, "Analysis Completed", "Data analysis completed successfully.")
        except Exception as e:
            QMessageBox.critical(self, "Error", f"An error occurred during data analysis: {str(e)}")

    def preprocess_data(self, df):
        # Preprocess the data based on the selected method
        method = self.preprocessing_combobox.currentText()
        numeric_columns = df.select_dtypes(include=['number']).columns
        non_numeric_columns = df.select_dtypes(exclude=['number']).columns

        if method == "Standardization":
            scaler = StandardScaler()
            df[numeric_columns] = scaler.fit_transform(df[numeric_columns])
        elif method == "Normalization":
            scaler = MinMaxScaler()
            df[numeric_columns] = scaler.fit_transform(df[numeric_columns])
        elif method == "KNN Imputation":
            imputer = KNNImputer(n_neighbors=5)
            df[numeric_columns] = imputer.fit_transform(df[numeric_columns])

        # Encode non-numeric columns
        for col in non_numeric_columns:
            if df[col].dtype == 'object':
                df[col] = df[col].astype(str)
                le = LabelEncoder()
                df[col] = le.fit_transform(df[col])

        return df

    def train_models(self, df, target_column):
        # Split the data into training and test sets
        X = df.drop(columns=[target_column])
        y = df[target_column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # Initialize models
        models = {
            'Decision Tree': DecisionTreeClassifier(),
            'Random Forest': RandomForestClassifier(),
            'Neural Network': MLPClassifier(max_iter=300),
            'SVM': SVC(),
            'Naive Bayes': GaussianNB()
        }

        # Train models and calculate accuracies
        accuracies = {}
        for model_name, model in models.items():
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            accuracies[model_name] = accuracy

        return accuracies

    def display_model_accuracies(self, accuracies):
        # Display model accuracies in the text edit widget
        self.results_textedit.clear()
        for model_name, accuracy in accuracies.items():
            self.results_textedit.append(f'{model_name}: {accuracy * 100:.2f}%')

    def display_results(self, df, target_column):
        # Display detailed results in the table widget
        target_counts = df[target_column].value_counts()
        self.results_table.setRowCount(len(target_counts))
        for row, (attack_type, count) in enumerate(target_counts.items()):
            self.results_table.setItem(row, 0, QTableWidgetItem(attack_type))
            self.results_table.setItem(row, 1, QTableWidgetItem(str(count)))
            # Placeholder values for normal and malicious test packets
            self.results_table.setItem(row, 2, QTableWidgetItem(str(count // 2)))
            self.results_table.setItem(row, 3, QTableWidgetItem(str(count // 2)))

    def generate_charts(self, df, target_column):
        # Generate and display charts
        self.charts_canvas.figure.clear()
        ax1 = self.charts_canvas.figure.add_subplot(121)
        ax2 = self.charts_canvas.figure.add_subplot(122)
        
        # Bar chart of target column value counts
        df[target_column].value_counts().plot(kind='bar', ax=ax1, title='Attack Type Counts')

        # Pie chart of target column value proportions
        df[target_column].value_counts().plot(kind='pie', ax=ax2, title='Attack Type Proportions', autopct='%1.1f%%')

        self.charts_canvas.draw()

    def show_all_charts(self):
        # Show additional charts in a separate window
        if self.charts_window is None:
            self.charts_window = ChartsWindow(self)
        self.charts_window.show()

class ChartsWindow(QWidget):
    def __init__(self, parent=None):
        super().__init__(parent)
        self.setWindowTitle("Charts")
        self.setGeometry(100, 100, 800, 600)
        layout = QVBoxLayout()
        self.setLayout(layout)

        self.figure = plt.figure(figsize=(8, 6))
        self.canvas = FigureCanvas(self.figure)
        layout.addWidget(self.canvas)

        self.plot_charts()

    def plot_charts(self):
        # Plot example charts
        ax = self.figure.add_subplot(111)
        ax.clear()
        # Example chart: simple line plot
        ax.plot([0, 1, 2, 3], [0, 1, 4, 9])
        self.canvas.draw()

if __name__ == "__main__":
    app = QApplication(sys.argv)
    ex = DataAnalysisGUI()
    sys.exit(app.exec_())
