# Streamlit

In [None]:
!pip install streamlit --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25h

## 1

In [None]:
%%writefile Ddos_Attack_Detection1.py

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import time
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

# Title of the Streamlit app
st.title("Data Understanding and Exploration")

# Upload CSV file
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
if uploaded_file is not None:
    # Read the data from the CSV file in chunks to handle large files
    chunk_size = 10000
    df = pd.concat([chunk for chunk in pd.read_csv(uploaded_file, chunksize=chunk_size)])

    # Initial Data Exploration
    st.header("Initial Data Exploration")
    st.write("Shape of the DataFrame:")
    st.write(df.shape)

    # Adjust display settings
    pd.set_option('display.max_columns', 100)
    pd.set_option('display.max_rows', 100)
    pd.set_option('display.width', 1000)
    pd.set_option('display.max_colwidth', 20)

    st.header("Descriptive Statistics")
    st.write(df.describe())

    # Frequency Counts for Categorical Variable
    if ' Label' in df.columns:
        st.header("Frequency Counts for 'Label'")
        frequency_counts = df[' Label'].value_counts()
        st.write(frequency_counts)

        label_counts = df[' Label'].value_counts()
        colors = plt.cm.viridis(np.linspace(0, 1, len(label_counts)))

        plt.figure(figsize=(12, 8))
        bars = plt.bar(label_counts.index, label_counts.values, color=colors, edgecolor='black')
        plt.grid(True, which='major', linestyle='--', linewidth='0.5', color='grey')
        plt.gca().set_axisbelow(True)

        for i, val in enumerate(label_counts):
            plt.text(i, val, str(val), ha='center', va='bottom')

        plt.title('Bar Chart of Label')
        plt.xlabel(' Label')
        plt.ylabel('Frequency')
        plt.xticks(rotation=45, ha='right')
        plt.legend(bars, label_counts.index, title="Types", bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        st.pyplot(plt.gcf())

    # Data Preparation
    st.header("Data Preparation")

    # Drop Irrelevant Features
    st.subheader("Drop Irrelevant Features")
    initial_columns = df.columns.tolist()
    irrelevant_features = ['Unnamed: 0', 'Flow ID', ' Source IP', ' Source Port', ' Destination IP', ' Destination Port', ' Timestamp', ' Protocol', 'SimillarHTTP', ' Inbound']
    df = df.drop(irrelevant_features, axis=1)
    dropped_features = [feature for feature in irrelevant_features if feature in initial_columns]
    st.write(f"Features dropped: {dropped_features}")
    st.write("DataFrame after dropping irrelevant features:")
    st.write(df.head())

    additional_features_to_drop = [' Init_Win_bytes_backward', 'Init_Win_bytes_forward', ' Fwd Header Length.1', 'Fwd IAT Total']
    df = df.drop(columns=additional_features_to_drop, errors='ignore')
    dropped_additional_features = [feature for feature in additional_features_to_drop if feature in initial_columns]
    st.write(f"Additional features dropped: {dropped_additional_features}")
    st.write("DataFrame and Shape after dropping additional specified columns:")
    st.write(df.head())
    st.write(df.shape)

    # Drop Duplicate Rows
    duplicate_rows = df.duplicated()
    st.write("Number of duplicate rows: ", duplicate_rows.sum())
    df.drop_duplicates(inplace=True)
    st.write("DataFrame and Shape after dropping duplicate rows:")
    st.write(df.head())
    st.write(df.shape)

    # Drop Missing Values (Rows)
    missing_values = df.isnull().sum()
    st.write("Missing values in each column before cleaning:\n", missing_values)
    df = df.dropna()
    missing_values_cleaned = df.isnull().sum()
    st.write("Missing values in each column after cleaning:\n", missing_values_cleaned)
    st.write(df.shape)

    # Identify and Replace Infinite Values
    inf_values = df.select_dtypes(include=[np.number]).applymap(np.isinf)
    inf_counts = inf_values.sum()
    st.write("Infinite values in each column:\n", inf_counts)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)
    st.write("Infinite values after replacing infinite values and dropping NaNs:")
    st.write(df.shape)

    # Drop Columns with More Than 50% Zero Values
    zero_val_percent = (df == 0).astype(int).sum(axis=0) / df.shape[0] * 100
    threshold = 50
    columns_to_drop = zero_val_percent[zero_val_percent > threshold].index
    st.write(f"Columns identified for dropping (greater than {threshold}% zeros):")
    st.write(columns_to_drop)
    df.drop(columns=columns_to_drop, inplace=True)
    st.write(df.shape)

    # Drop Columns with More Than 50% Negative Values
    negative_columns = [col for col in df.select_dtypes(include=['int64', 'float64']).columns if (df[col] < 0).mean() > 0.5]
    st.write("Columns with more than 50% negative values:")
    st.write(negative_columns)
    df = df.drop(columns=negative_columns)
    st.write("Shape after dropping columns with more than 50% negative values: ", df.shape)

    # Drop Rows with Any Negative Value
    numerical_columns = df.select_dtypes(include=['int64', 'float64'])
    negative_values = numerical_columns < 0
    records_with_negative = df[negative_values.any(axis=1)]
    st.write("Number of rows with negative values in numerical columns:", len(records_with_negative))
    columns_with_negative = numerical_columns.columns[(negative_values.any())]
    st.write("Numerical columns with negative values:")
    st.write(columns_with_negative)
    df = df[~negative_values.any(axis=1)]
    st.write("Shape after dropping rows with negative values: ", df.shape)

    # Frequency Counts for Categorical Variable
    st.header("Frequency Counts for 'Label'")
    frequency_counts = df[' Label'].value_counts()
    st.write(frequency_counts)

    label_counts = df[' Label'].value_counts()
    colors = plt.cm.viridis(np.linspace(0, 1, len(label_counts)))

    plt.figure(figsize=(12, 8))
    bars = plt.bar(label_counts.index, label_counts.values, color=colors, edgecolor='black')
    plt.grid(True, which='major', linestyle='--', linewidth='0.5', color='grey')
    plt.gca().set_axisbelow(True)

    for i, val in enumerate(label_counts):
        plt.text(i, val, str(val), ha='center', va='bottom')

    plt.title('Bar Chart of Label')
    plt.xlabel(' Label')
    plt.ylabel('Frequency ')
    plt.xticks(rotation=45, ha='right')
    plt.legend(bars, label_counts.index, title="Types", bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    st.pyplot(plt.gcf())

    # Data Transformation - Label Encoder
    st.header("Data Transformation - Label Encoder")
    df[' Label'] = df[' Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)
    st.write("Label distribution after transformation:")
    st.write(df[' Label'].value_counts())

    plt.figure(figsize=(8, 6))
    ax = sns.countplot(x=' Label', data=df, palette='coolwarm')
    plt.title('Distribution of Labels')
    plt.xlabel(' Label')
    plt.ylabel('Count')
    plt.xticks([0, 1], ['Benign (0)', 'DDoS (1)'])
    for p in ax.patches:
        ax.annotate(f'{int(p.get_height()):,}', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='center', xytext=(0, 10), textcoords='offset points')
    st.pyplot(plt)

    # Data Standardization
    st.header("Data Standardization")
    X = df.drop(' Label', axis=1)
    y = df[' Label']

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    df_scaled = pd.DataFrame(X_scaled, columns=X.columns)
    df_scaled[' Label'] = y.values

    st.write("Standardized DataFrame")
    st.write(df_scaled.head())

    # Data Balancing
    st.header("Data Balancing")
    df_majority = df_scaled[df_scaled[' Label'] == 0]
    df_minority = df_scaled[df_scaled[' Label'] == 1]

    desired_sample_size = 7000

    df_majority_undersampled = df_majority.sample(n=desired_sample_size, random_state=42)
    df_minority_undersampled = df_minority.sample(n=desired_sample_size, random_state=42)

    df_balanced = pd.concat([df_majority_undersampled, df_minority_undersampled])

    plt.figure(figsize=(10, 6))
    ax = sns.countplot(x=' Label', data=df_balanced, palette='pastel')
    plt.title('Distribution of Labels (Balanced)')
    for p in ax.patches:
        ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='baseline', fontsize=12, color='black', xytext=(0, 5),
                    textcoords='offset points')
    plt.xlabel('Label')
    plt.ylabel('Count')
    plt.xticks([0, 1], ['Benign (0)', 'DDoS (1)'])
    st.pyplot(plt)

    # Save the DataFrame to a CSV file
    df_balanced.to_csv('Cleaned_Dataset.csv', index=False)
    st.write("File saved as 'Cleaned_Dataset.csv'")

    # Load and Read Dataset (CIC-DDoS2019 - Day 2)
    st.header("Load and Read Dataset (CIC-DDoS2019 - Day 2)")
    df_cleaned = pd.read_csv('Cleaned_Dataset.csv')
    st.write("First few rows of the cleaned dataset:")
    st.write(df_cleaned.head())


# Data Splitting (70:30)
    st.header("Data Splitting (70:30)")
    X = df_cleaned.drop(' Label', axis=1)
    y = df_cleaned[' Label']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
    st.write("Training set (X):", X_train.shape)
    st.write("Training set (y):", y_train.shape)
    st.write("Test set (X):", X_test.shape)
    st.write("Test set (y):", y_test.shape)

    # Plot the distribution of the target in the training and testing sets
    plt.figure(figsize=(14, 7))

    plt.subplot(1, 2, 1)
    train_counts = y_train.value_counts()
    train_bar = train_counts.plot(kind='bar', title='Distribution of target in training set', color='skyblue')
    plt.xticks(rotation=0)
    for i, value in enumerate(train_counts):
        plt.text(i, value + value * 0.01, f'{value}', ha='center', va='bottom')
        plt.xticks([0, 1], ['Benign (0)', 'DDoS (1)'])

    plt.subplot(1, 2, 2)
    test_counts = y_test.value_counts()
    test_bar = test_counts.plot(kind='bar', title='Distribution of target in testing set', color='salmon')
    plt.xticks(rotation=0)
    for i, value in enumerate(test_counts):
        plt.text(i, value + value * 0.01, f'{value}', ha='center', va='bottom')
        plt.xticks([0, 1], ['Benign (0)', 'DDoS (1)'])

    plt.subplots_adjust(wspace=0.3)
    plt.suptitle('Target Distribution in Training and Testing Sets in CIC-DDoS2019')
    train_bar.grid(axis='y', linestyle='--', alpha=0.7)
    test_bar.grid(axis='y', linestyle='--', alpha=0.7)
    plt.subplot(1, 2, 1).set_facecolor('whitesmoke')
    plt.subplot(1, 2, 2).set_facecolor('whitesmoke')
    st.pyplot(plt)

    # Feature Selection PCA (70:30)
    st.header("Feature Selection PCA (70:30)")
    pca = PCA(n_components=25)
    pca.fit(X_train)
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    st.write("Training set after PCA (X):", X_train_pca.shape)
    st.write("Test set after PCA (X):", X_test_pca.shape)

    feature_names = X_train.columns.tolist()
    pca_components = pd.DataFrame(pca.components_, columns=feature_names)

    pc1_contributions = pca_components.iloc[0]
    pc2_contributions = pca_components.iloc[1]

    plt.figure(figsize=(14, 6))
    plt.bar(feature_names, pc1_contributions, alpha=0.5, label='PC1')
    plt.bar(feature_names, pc2_contributions, alpha=0.5, label='PC2', bottom=pc1_contributions)
    plt.title('Feature Contributions to the First Two Principal Components')
    plt.xlabel('Feature Names')
    plt.ylabel('Contribution')
    plt.xticks(rotation=90)
    plt.legend()
    st.pyplot(plt)

    # Feature Selection LDA (70:30)
    st.header("Feature Selection LDA (70:30)")
    lda = LDA(n_components=1)
    lda.fit(X_train, y_train)
    X_train_lda = lda.transform(X_train)
    X_test_lda = lda.transform(X_test)
    st.write("Training set after LDA (X):", X_train_lda.shape)
    st.write("Test set after LDA (X):", X_test_lda.shape)

    lda_coefficients = lda.coef_[0]
    plt.figure(figsize=(12, 6))
    plt.bar(feature_names, lda_coefficients)
    plt.xlabel('Feature Names')
    plt.ylabel('LDA Coefficient')
    plt.title('LDA Coefficients for Each Feature')
    plt.xticks(rotation=90)
    st.pyplot(plt)


    #### Default Parameter [kernel = linear] - TRAIN - WITHOUT FS

    # Initialize an SVM with a linear kernel
    svm_train_linear_wf = SVC(kernel="linear")

    # Measure training time
    start_time = time.time()
    svm_train_linear_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_linear_wf = svm_train_linear_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_linear_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_linear_wf)

    # Display the results in Streamlit
    st.header("SVM (Kernel: Linear) - Without Feature Selection - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [kernel = linear] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_linear_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_linear_wf = svm_train_linear_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_linear_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_linear_wf)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of SVM (Kernel-Linear) -Train and Test Set - Without Feature Selection. - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {'C': [1, 10]}

    # Initialize the SVM model
    svm = SVC()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(svm, param_grid, cv=5, factor=2, n_jobs=-1)
    halving_grid_search.fit(X_train, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters for Kernel Linear without Feature Selection : {best_params}")

    #### Tuning Parameter [kernel: linear, C=10] - TRAIN

    # Initialize an SVM with a linear kernel
    svm_train_linear_wf = SVC(kernel="linear", C=10)

    # Measure training time
    start_time = time.time()
    svm_train_linear_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_linear_wf = svm_train_linear_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_linear_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_linear_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: Linear, C=10) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [kernel: linear, C=10] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_linear_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_linear_wf = svm_train_linear_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_linear_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_linear_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: Linear, C=10) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    #### Default Parameter [kernel = linear] - TRAIN - WITH PCA

    # Initialize an SVM with a linear kernel
    svm_train_linear_f = SVC(kernel="linear")

    # Measure training time
    start_time = time.time()
    svm_train_linear_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_linear_f = svm_train_linear_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_linear_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_linear_f)

    # Display the results in Streamlit
    st.header("SVM (Kernel: Linear) - With PCA - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [kernel = linear] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_linear_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_linear_f = svm_train_linear_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_linear_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_linear_f)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of SVM (Kernel-Linear) -Train and Test Set - Without Feature Selection. - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {'C': [1, 10]}

    # Initialize the SVM model
    svm = SVC()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(svm, param_grid, cv=5, factor=2, n_jobs=-1)
    halving_grid_search.fit(X_train_pca, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters for Kernel Linear with PCA : {best_params}")

    #### Tuning Parameter [kernel: linear, C=10] - TRAIN

    # Initialize an SVM with a linear kernel
    svm_train_linear_f = SVC(kernel="linear", C=10)

    # Measure training time
    start_time = time.time()
    svm_train_linear_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_linear_f = svm_train_linear_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_linear_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_linear_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: Linear, C=10) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [kernel: linear, C=10] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_linear_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_linear_f = svm_train_linear_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_linear_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_linear_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: Linear, C=10) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ## Implementation : Support Vector Machine KERNEL: POLYNOMIAL

    ### Implementation of SVM (Kernel- Polynomial) Train and Test Set - Without Feature Selection - DEFAULT

    #### Default Parameter [kernel="poly"] - TRAIN

    # Initialize an SVM with a poly kernel
    svm_train_poly_wf = SVC(kernel="poly")

    # Measure training time
    start_time = time.time()
    svm_train_poly_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_poly_wf = svm_train_poly_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_poly_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_poly_wf)

    # Display the results in Streamlit
    st.header("SVM (Kernel: Polynomial) - Without Feature Selection - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [kernel="poly"] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_poly_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_poly_wf = svm_train_poly_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_poly_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_poly_wf)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of SVM (Kernel-Poly) -Train and Test Set - Without Feature Selection. - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {'C': [1, 10], 'degree': [2, 3]}

    # Initialize the SVM model
    svm = SVC()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(svm, param_grid, cv=5, factor=2, n_jobs=-1)
    halving_grid_search.fit(X_train, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters for Kernel Polynomial without Feature Selection: {best_params}")

    #### Tuning Parameter [kernel: poly, C=10, degree=3] - TRAIN

    # Initialize an SVM with a poly kernel
    svm_train_poly_wf = SVC(kernel="poly", C=10, degree=3)

    # Measure training time
    start_time = time.time()
    svm_train_poly_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_poly_wf = svm_train_poly_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_poly_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_poly_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: Polynomial, C=10, Degree=3) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [kernel: poly, C=10, degree=3] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_poly_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_poly_wf = svm_train_poly_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_poly_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_poly_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: Polynomial, C=10, Degree=3) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of SVM (Kernel-Polynomial) -Train and Test Set - With PCA. - DEFAULT

    #### Default Parameter [kernel="poly"] - TRAIN

    # Train the SVM classifier with a polynomial kernel on the selected features
    svm_train_poly_f = SVC(kernel="poly")

    # Measure training time
    start_time = time.time()
    svm_train_poly_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_poly_f = svm_train_poly_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_poly_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_poly_f)

    # Display the results in Streamlit
    st.header("SVM (Kernel: Polynomial) - With PCA - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [kernel="poly"] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_poly_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_poly_f = svm_train_poly_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_poly_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_poly_f)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of SVM (Kernel-Poly) -Train and Test Set - With PCA - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {'C': [1, 10], 'degree': [2, 3]}

    # Initialize the SVM model
    svm = SVC()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(svm, param_grid, cv=5, factor=2, n_jobs=-1)
    halving_grid_search.fit(X_train_pca, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters for Kernel Polynomial with PCA: {best_params}")

    #### Tuning Parameter [kernel: poly, C=10, degree=3] - TRAIN

    # Train the SVM classifier with a polynomial kernel on the selected features
    svm_train_poly_f = SVC(kernel="poly", degree=3, C=10)

    # Measure training time
    start_time = time.time()
    svm_train_poly_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_poly_f = svm_train_poly_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_poly_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_poly_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: Polynomial, C=10, Degree=3) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [kernel: poly, C=10, degree=3] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_poly_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_poly_f = svm_train_poly_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_poly_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_poly_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: Polynomial, C=10, Degree=3) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ## Implementation : Support Vector Machine KERNEL: RBF

    ### Implementation of SVM (Kernel - RBF) - Train and Test Set - Without Feature Selection - DEFAULT

    #### Default Parameter [kernel = rbf] - TRAIN

    # Initialize an SVM with a rbf kernel
    svm_train_rbf_wf = SVC(kernel="rbf")

    # Measure training time
    start_time = time.time()
    svm_train_rbf_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_rbf_wf = svm_train_rbf_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_rbf_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_rbf_wf)

    # Display the results in Streamlit
    st.header("SVM (Kernel: RBF) - Without Feature Selection - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [kernel = rbf] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_rbf_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_rbf_wf = svm_train_rbf_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_rbf_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_rbf_wf)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of SVM (Kernel RBF) - Train and Test Set - Without Feature Selection - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {'C': [1, 10], 'gamma': ['scale', 'auto']}

    # Initialize the SVM model
    svm = SVC()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(svm, param_grid, cv=5, factor=2, n_jobs=-1)
    halving_grid_search.fit(X_train, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters for Kernel RBF without Feature Selection: {best_params}")

    #### Tuning Parameter [kernel: rbf, C=10, gamma=scale] - TRAIN

    # Initialize an SVM with a rbf kernel
    svm_train_rbf_wf = SVC(kernel="rbf", C=10, gamma='scale')

    # Measure training time
    start_time = time.time()
    svm_train_rbf_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_rbf_wf = svm_train_rbf_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_rbf_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_rbf_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: RBF, C=10, Gamma=scale) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [kernel: rbf, C=10, gamma=scale] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_rbf_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_rbf_wf = svm_train_rbf_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_rbf_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_rbf_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: RBF, C=10, Gamma=scale) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of SVM (Kernel-RBF) using PCA - Train and Test set - DEFAULT

    #### Default Parameter [kernel=rbf] - TRAIN

    # Initialize an SVM with a rbf kernel
    svm_train_rbf_f = SVC(kernel="rbf")

    # Measure training time
    start_time = time.time()
    svm_train_rbf_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_rbf_f = svm_train_rbf_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_rbf_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_rbf_f)

    # Display the results in Streamlit
    st.header("SVM (Kernel: RBF) - With PCA - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [kernel=rbf] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_rbf_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_rbf_f = svm_train_rbf_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_rbf_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_rbf_f)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of SVM (Kernel-RBF) using PCA - Train and Test set - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {'C': [1, 10], 'gamma': ['scale', 'auto']}

    # Initialize the SVM model
    svm = SVC()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(svm, param_grid, cv=5, factor=2, n_jobs=-1)
    halving_grid_search.fit(X_train_pca, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters for Kernel RBF using PCA: {best_params}")

    #### Tuning Parameter [kernel=rbf, C=10, gamma=auto] - TRAIN

    # Initialize an SVM with a rbf kernel
    svm_train_rbf_f = SVC(kernel="rbf", C=10, gamma='auto')

    # Measure training time
    start_time = time.time()
    svm_train_rbf_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_rbf_f = svm_train_rbf_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_rbf_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_rbf_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: RBF, C=10, Gamma=auto) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [kernel=rbf, C=10, gamma=auto] - TEST

    # Measure testing time
    start_time = time.time()
    svm_train_rbf_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_rbf_f = svm_train_rbf_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_rbf_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_rbf_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Kernel: RBF, C=10, Gamma=auto) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)




    # KNN (70:30)

    ## Implementation for KNN (Euclidean)

    ### Implementation of KNN (Euclidean) -- Train and Test Set  Without Feature Selection.- DEFAULT

    #### Default Parameter [metric=Euclidean] -TRAIN

    # Initialize an knn with a euclidean Metric
    knn_train_euclidean_wf = KNeighborsClassifier(n_neighbors=99, metric='euclidean', p=2) #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_euclidean_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_euclidean_wf = knn_train_euclidean_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_euclidean_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_euclidean_wf)

    # Display the results in Streamlit
    st.header("KNN (Euclidean) - Without Feature Selection - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [metric=Euclidean] -TEST

    # Initialize an knn with a euclidean Metric
    knn_test_euclidean_wf = KNeighborsClassifier(n_neighbors=99,metric='euclidean', p=2)

    start_time = time.time()
    knn_test_euclidean_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_euclidean_wf = knn_test_euclidean_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_euclidean_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_euclidean_wf)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of KNN (Euclidean) -- Train and Test Set  Without Feature Selection.- HALVING GRID SEARCH

    from sklearn.experimental import enable_halving_search_cv
    from sklearn.model_selection import HalvingGridSearchCV

    # Define the parameter grid
    param_grid = {
        'n_neighbors': [95,97],
        'metric': ['euclidean']
    }

    # Initialize the KNN model
    knn = KNeighborsClassifier()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(knn, param_grid, cv=5, factor=2,  n_jobs=-1)

    halving_grid_search.fit(X_train, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters: {best_params}")

    ####  Tuning Parameter [metric=Euclidean,'n_neighbors': 95] -TRAIN

    # Initialize an knn with a euclidean Metric
    knn_train_euclidean_wf = KNeighborsClassifier(n_neighbors=95, metric='euclidean', p=2)

    # Measure training time
    start_time = time.time()
    knn_train_euclidean_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_euclidean_wf = knn_train_euclidean_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_euclidean_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_euclidean_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Euclidean, n_neighbors=95) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [metric=Euclidean'n_neighbors': 95] -TEST

    # Initialize an knn with a euclidean Metric
    knn_test_euclidean_wf = KNeighborsClassifier(n_neighbors=95,metric='euclidean', p=2)

    start_time = time.time()
    knn_test_euclidean_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_euclidean_wf = knn_test_euclidean_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_euclidean_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_euclidean_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Euclidean, n_neighbors=95) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of KNN (Metric-euclidean) -Train Set and Test Set - With PCA. - DEFAULT

    #### Default Parameter [metric=Euclidean] -TRAIN

    # Initialize an knn with a euclidean Metric
    knn_train_euclidean_f = KNeighborsClassifier(n_neighbors=99,metric='euclidean', p=2) #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_euclidean_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_euclidean_f = knn_train_euclidean_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_euclidean_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_euclidean_f)

    # Display the results in Streamlit
    st.header("KNN (Euclidean) - With PCA - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [metric=Euclidean] -TEST

    # Initialize an knn with a euclidean Metric
    knn_test_euclidean_f = KNeighborsClassifier(n_neighbors=99,metric='euclidean', p=2) #sqrt

    # Measure testing time
    start_time = time.time()
    knn_test_euclidean_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_euclidean_f = knn_test_euclidean_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_euclidean_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_euclidean_f)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of KNN (Euclidean) -- Train and Test Set  With PCA.- HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {
        'n_neighbors': [95,97],
        'metric': ['euclidean']
    }

    # Initialize the KNN model
    knn = KNeighborsClassifier()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(knn, param_grid, cv=5, factor=2,  n_jobs=-1)

    halving_grid_search.fit(X_train_pca, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters: {best_params}")

    ####  Tuning Parameter [metric=Euclidean,'n_neighbors': 95] -TRAIN

    # Initialize an knn with a euclidean Metric
    knn_train_euclidean_f = KNeighborsClassifier(n_neighbors=95,metric='euclidean', p=2) #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_euclidean_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_euclidean_f = knn_train_euclidean_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_euclidean_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_euclidean_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Euclidean, n_neighbors=95) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [metric=Euclidean'n_neighbors': 95] -TEST

    # Initialize an knn with a euclidean Metric
    knn_test_euclidean_f = KNeighborsClassifier(n_neighbors=95,metric='euclidean', p=2) #sqrt

    # Measure testing time
    start_time = time.time()
    knn_test_euclidean_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_euclidean_f = knn_test_euclidean_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_euclidean_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_euclidean_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Euclidean, n_neighbors=95) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)



    ## Implementation for KNN (Manhattan)

    ### Implementation of KNN (Manhattan) -- Train and Test Set  Without Feature Selection.- DEFAULT

    #### Default Parameter [metric-Manhattan] -TRAIN

    # Initialize an knn with a manhattan Metric
    knn_train_manhattan_wf = KNeighborsClassifier(n_neighbors=99, metric='manhattan', p=1) #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_manhattan_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_manhattan_wf = knn_train_manhattan_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_manhattan_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_manhattan_wf)

    # Display the results in Streamlit
    st.header("KNN (Manhattan) - Without Feature Selection - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [metric=Manhattan] -TEST

    # Initialize an knn with a manhattan Metric
    knn_test_manhattan_wf = KNeighborsClassifier(n_neighbors=99, metric='manhattan', p=1)

    start_time = time.time()
    knn_test_manhattan_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_manhattan_wf = knn_test_manhattan_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_manhattan_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_manhattan_wf)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ### Implementation of KNN (Manhattan) -- Train and Test Set  Without Feature Selection.- HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {
        'n_neighbors': [95,97],
        'metric': ['manhattan']
    }

    # Initialize the KNN model
    knn = KNeighborsClassifier()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(knn, param_grid, cv=5, factor=2,  n_jobs=-1)

    halving_grid_search.fit(X_train, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters: {best_params}")

    ####  Tuning Parameter [metric=Manhattan,'n_neighbors': 95] -TRAIN

    # Initialize an knn with a manhattan Metric
    knn_train_manhattan_wf = KNeighborsClassifier(n_neighbors=95, metric='manhattan', p=1)

    # Measure training time
    start_time = time.time()
    knn_train_manhattan_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_manhattan_wf = knn_train_manhattan_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_manhattan_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_manhattan_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Manhattan, n_neighbors=95) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ####  Tuning Parameter [metric=Manhattan,'n_neighbors': 95] -TEST

    # Initialize an knn with a manhattan Metric
    knn_test_manhattan_wf = KNeighborsClassifier(n_neighbors=95, metric='manhattan', p=1)

    start_time = time.time()
    knn_test_manhattan_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_manhattan_wf = knn_test_manhattan_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_manhattan_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_manhattan_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Manhattan, n_neighbors=95) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ### Implementation of knn (Metric-manhattan) -Train Set and Test Set - With PCA. - DEFAULT

    #### Default Parameter [metric-Manhattan] -TRAIN

    # Initialize an knn with a manhattan Metric
    knn_train_manhattan_f = KNeighborsClassifier(n_neighbors=99, metric='manhattan', p=1) #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_manhattan_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_manhattan_f = knn_train_manhattan_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_manhattan_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_manhattan_f)

    # Display the results in Streamlit
    st.header("KNN (Manhattan) - With PCA - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [metric-Manhattan] -TEST

    # Initialize an knn with a manhattan Metric
    knn_test_manhattan_f = KNeighborsClassifier(n_neighbors=99, metric='manhattan', p=1) #sqrt

    # Measure testing time
    start_time = time.time()
    knn_test_manhattan_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_manhattan_f = knn_test_manhattan_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_manhattan_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_manhattan_f)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ### Implementation of KNN (Manhattan) -- Train and Test Set  With PCA.- HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {
        'n_neighbors': [95,97],
        'metric': ['manhattan']
    }

    # Initialize the KNN model
    knn = KNeighborsClassifier()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(knn, param_grid, cv=5, factor=2,  n_jobs=-1)

    halving_grid_search.fit(X_train_pca, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters: {best_params}")

    ####  Tuning Parameter [metric=Manhattan,'n_neighbors': 95] -TRAIN

    # Initialize an knn with a manhattan Metric
    knn_train_manhattan_f = KNeighborsClassifier(n_neighbors=95, metric='manhattan', p=1) #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_manhattan_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_manhattan_f = knn_train_manhattan_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_manhattan_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_manhattan_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Manhattan, n_neighbors=95) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ####  Tuning Parameter [metric=Manhattan,'n_neighbors': 95] -TEST

    # Initialize an knn with a manhattan Metric
    knn_test_manhattan_f = KNeighborsClassifier(n_neighbors=95, metric='manhattan', p=1) #sqrt

    # Measure testing time
    start_time = time.time()
    knn_test_manhattan_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_manhattan_f = knn_test_manhattan_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_manhattan_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_manhattan_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Manhattan, n_neighbors=95) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ## Implementation for KNN (Cosine)

    ### Implementation of KNN (cosine) -Train Set & Test Set - Without Feature Selection. - Default

    #### Default Parameter [metric=Cosine] -TRAIN

    # Initialize an knn with a cosine Metric
    knn_train_cosine_wf = KNeighborsClassifier(n_neighbors=99, metric='cosine') #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_cosine_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_cosine_wf = knn_train_cosine_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_cosine_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_cosine_wf)

    # Display the results in Streamlit
    st.header("KNN (Cosine) - Without Feature Selection - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [metric=Cosine] -TEST

    # Initialize an knn with a cosine Metric
    knn_test_cosine_wf = KNeighborsClassifier(n_neighbors=99, metric='cosine')

    start_time = time.time()
    knn_test_cosine_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_cosine_wf = knn_test_cosine_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_cosine_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_cosine_wf)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of KNN (cosine) -Train Set & Test Set - Without Feature Selection. - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {
        'n_neighbors': [95,97],
        'metric': ['cosine']
    }

    # Initialize the KNN model
    knn = KNeighborsClassifier()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(knn, param_grid, cv=5, factor=2,  n_jobs=-1)

    halving_grid_search.fit(X_train, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters: {best_params}")

    #### Tuning Parameter [metric=Cosine, 'n_neighbors': 95] -TRAIN

    # Initialize an knn with a cosine Metric
    knn_train_cosine_wf = KNeighborsClassifier(n_neighbors=95, metric='cosine') #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_cosine_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_cosine_wf = knn_train_cosine_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_cosine_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_cosine_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Cosine, n_neighbors=95) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [metric=Cosine,'n_neighbors': 95] -TEST

    # Initialize an knn with a cosine Metric
    knn_test_cosine_wf = KNeighborsClassifier(n_neighbors=95, metric='cosine')

    start_time = time.time()
    knn_test_cosine_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

   # Measure prediction time
    start_time = time.time()
    y_test_pred_cosine_wf = knn_test_cosine_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_cosine_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_cosine_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Cosine, n_neighbors=95) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    ### Implementation of KNN (cosine) -Train Set & Test Set -With PCA - Default

    #### Default Parameter [metric=Cosine] -TRAIN

    # Initialize an knn with a cosine Metric
    knn_train_cosine_f = KNeighborsClassifier(n_neighbors=99, metric='cosine') #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_cosine_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_cosine_f = knn_train_cosine_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_cosine_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_cosine_f)

    # Display the results in Streamlit
    st.header("KNN (Cosine) - With PCA - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter [metric=Cosine] -TEST

    # Initialize an knn with a cosine Metric
    knn_test_cosine_f = KNeighborsClassifier(n_neighbors=99, metric='cosine') #sqrt

    # Measure testing time
    start_time = time.time()
    knn_test_cosine_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_cosine_f = knn_test_cosine_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_cosine_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_cosine_f)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ### Implementation of KNN (cosine) -Train Set & Test Set - With PCA. - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {
        'n_neighbors': [95,97],
        'metric': ['cosine']
    }

    # Initialize the KNN model
    knn = KNeighborsClassifier()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(knn, param_grid, cv=5, factor=2,  n_jobs=-1)

    halving_grid_search.fit(X_train_pca, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters: {best_params}")

    #### Tuning Parameter [metric=Cosine, 'n_neighbors': 95] -TRAIN

    # Initialize an knn with a cosine Metric
    knn_train_cosine_f = KNeighborsClassifier(n_neighbors=95, metric='cosine') #sqrt

    # Measure training time
    start_time = time.time()
    knn_train_cosine_f.fit(X_train_pca, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_cosine_f = knn_train_cosine_f.predict(X_train_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_cosine_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_cosine_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Cosine, n_neighbors=95) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter [metric=Cosine,'n_neighbors': 95] -TEST

    # Initialize an knn with a cosine Metric
    knn_test_cosine_f = KNeighborsClassifier(n_neighbors=95, metric='cosine') #sqrt

    # Measure testing time
    start_time = time.time()
    knn_test_cosine_f.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_cosine_f = knn_test_cosine_f.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_cosine_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_cosine_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Cosine, n_neighbors=95) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ## Implementation : Naive Bayes - Classification: GaussianBayes

    ### Implementation of NB (Gb) -Train Set and Test Set - Without Feature Selection. - DEFAULT

    #### Default Parameter GNB - TRAIN

    # Initialize an nb with a gb kernel
    nb_train_gb_wf = GaussianNB()

    # Measure training time
    start_time = time.time()
    nb_train_gb_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_gb_wf = nb_train_gb_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_gb_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_gb_wf)

    # Display the results in Streamlit
    st.header("Naive Bayes (Gaussian) - Without Feature Selection - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter GNB - TEST

    # Measure testing time
    start_time = time.time()
    nb_train_gb_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_gb_wf = nb_train_gb_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_gb_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_gb_wf)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)



    ### Implementation of NB (Gb) -Train Set and Test Set - Without Feature Selection. - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {
        'var_smoothing': np.logspace(-9, 0, 10)
    }

    # Initialize the GaussianNB model
    gnb = GaussianNB()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(gnb, param_grid, cv=5, factor=2,  n_jobs=-1)

    halving_grid_search.fit(X_train, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters: {best_params}")

    #### Tuning Parameter GNB - TRAIN

    # Initialize an nb with a gb kernel
    nb_train_gb_wf = GaussianNB(var_smoothing=best_params['var_smoothing'])

    # Measure training time
    start_time = time.time()
    nb_train_gb_wf.fit(X_train, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_gb_wf = nb_train_gb_wf.predict(X_train)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_gb_wf)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_gb_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Gaussian, var_smoothing=0.01) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter GNB - TEST

    # Initialize an nb with a gb kernel
    nb_test_gb_wf = GaussianNB(var_smoothing=best_params['var_smoothing'])

    # Measure testing time
    start_time = time.time()
    nb_test_gb_wf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_gb_wf = nb_test_gb_wf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_gb_wf)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_gb_wf)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Gaussian, var_smoothing=0.01) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ### Implementation of nb (Kernel-gb) -Train Set and Test Set- With LDA - DEFAULT

    #### Default Parameter GNB - TRAIN

    # Initialize an nb with a gb kernel
    nb_train_gb_f = GaussianNB()

    # Measure training time
    start_time = time.time()
    nb_train_gb_f.fit(X_train_lda, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_gb_f = nb_train_gb_f.predict(X_train_lda)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_gb_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_gb_f)

    # Display the results in Streamlit
    st.header("Naive Bayes (Gaussian) - With LDA - Default Parameters")
    st.subheader("Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Default Parameter GNB - TEST

    # Measure testing time
    start_time = time.time()
    nb_train_gb_f.fit(X_test_lda, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_gb_f = nb_train_gb_f.predict(X_test_lda)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_gb_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_gb_f)

    # Display the results in Streamlit
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)


    ### Implementation of nb (Kernel-gb) -Train Set and Test Set- With LDA - HALVING GRID SEARCH

    # Define the parameter grid
    param_grid = {
        'var_smoothing': np.logspace(-9, 0, 10)
    }

    # Initialize the GaussianNB model
    gnb = GaussianNB()

    # Initialize HalvingGridSearchCV for faster results
    halving_grid_search = HalvingGridSearchCV(gnb, param_grid, cv=5, factor=2,  n_jobs=-1)

    halving_grid_search.fit(X_train_lda, y_train)

    best_params = halving_grid_search.best_params_
    st.subheader("Best Hyperparameters Tuning")
    st.write(f"Best hyperparameters: {best_params}")

    #### Tuning Parameter GNB - TRAIN

    # Initialize an nb with a gb kernel
    nb_train_gb_f = GaussianNB(var_smoothing=0.001)

    # Measure training time
    start_time = time.time()
    nb_train_gb_f.fit(X_train_lda, y_train)
    training_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_train_pred_gb_f = nb_train_gb_f.predict(X_train_lda)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred_gb_f)

    # Classification report
    class_report = classification_report(y_train, y_train_pred_gb_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Gaussian, var_smoothing=0.001) - Training Results")
    st.write(f"Training time: {training_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Training Accuracy: {train_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)

    #### Tuning Parameter GNB - TEST

    # Initialize an nb with a gb kernel
    nb_test_gb_f = GaussianNB(var_smoothing=0.001)

    # Measure testing time
    start_time = time.time()
    nb_test_gb_f.fit(X_test_lda, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred_gb_f = nb_test_gb_f.predict(X_test_lda)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_test_pred_gb_f)

    # Classification report
    class_report = classification_report(y_test, y_test_pred_gb_f)

    # Display the results in Streamlit
    st.subheader("Tuned Parameters (Gaussian, var_smoothing=0.001) - Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(class_report)




    # Initialize the models with better hyperparameters
    svm_rbf_clf = SVC(probability=True, kernel='rbf', C=10, gamma='scale')
    knn_man_clf = KNeighborsClassifier(metric='manhattan', n_neighbors=95, p=2, weights='distance', algorithm='auto')
    nb_clf = GaussianNB(var_smoothing=0.01)

    # Create an ensemble using VotingClassifier with unique names
    ensemble_clf = VotingClassifier(estimators=[
        ('svm_rbf', svm_rbf_clf),
        ('knn_manhattan', knn_man_clf),
        ('naive_bayes', nb_clf)
    ], voting='soft', n_jobs=-1)  # Use all available cores

    # Measure testing time
    start_time = time.time()
    ensemble_clf.fit(X_test, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred = ensemble_clf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy on test data
    test_accuracy = accuracy_score(y_test, y_test_pred)
    report = classification_report(y_test, y_test_pred)

    # Display the results in Streamlit
    st.header("Ensemble Model - Without Feature Selection")
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(report)

    ## Ensemble - SVM, KNN, NB - With PCA

    # Initialize the models with better hyperparameters
    svm_rbf_clf = SVC(probability=True, kernel='rbf', C=10, gamma='scale')
    knn_man_clf = KNeighborsClassifier(metric='manhattan', n_neighbors=95, weights='distance', algorithm='auto')
    nb_clf = GaussianNB(var_smoothing=0.01)

    # Create an ensemble using VotingClassifier with unique names
    ensemble_clf = VotingClassifier(estimators=[
        ('svm_rbf', svm_rbf_clf),
        ('knn_manhattan', knn_man_clf),
        ('naive_bayes', nb_clf)
    ], voting='soft', n_jobs=-1)  # Use all available cores

    # Measure testing time
    start_time = time.time()
    ensemble_clf.fit(X_test_pca, y_test)
    testing_time = time.time() - start_time

    # Measure prediction time
    start_time = time.time()
    y_test_pred = ensemble_clf.predict(X_test_pca)
    prediction_time = time.time() - start_time

    # Calculate accuracy on test data
    test_accuracy = accuracy_score(y_test, y_test_pred)
    report = classification_report(y_test, y_test_pred)

    # Display the results in Streamlit
    st.header("Ensemble Model - With PCA")
    st.subheader("Testing Results")
    st.write(f"Testing time: {testing_time:.4f} seconds")
    st.write(f"Prediction time: {prediction_time:.4f} seconds")
    st.write(f"Testing Accuracy: {test_accuracy:.4f}")
    st.text("Classification Report:")
    st.text(report)

    # Define the summary results for all models
    data = {
        'Model': [
            'SVM (Linear) - Default', 'SVM (Linear) - Tuned',
            'SVM (Linear + PCA) - Default', 'SVM (Linear + PCA) - Tuned',
            'SVM (Poly) - Default', 'SVM (Poly) - Tuned',
            'SVM (Poly + PCA) - Default', 'SVM (Poly + PCA) - Tuned',
            'SVM (RBF) - Default', 'SVM (RBF) - Tuned',
            'SVM (RBF + PCA) - Default', 'SVM (RBF + PCA) - Tuned',
            'KNN (Euclidean) - Default', 'KNN (Euclidean) - Tuned',
            'KNN (Euclidean + PCA) - Default', 'KNN (Euclidean + PCA) - Tuned',
            'KNN (Manhattan) - Default', 'KNN (Manhattan) - Tuned',
            'KNN (Manhattan + PCA) - Default', 'KNN (Manhattan + PCA) - Tuned',
            'KNN (Cosine) - Default', 'KNN (Cosine) - Tuned',
            'KNN (Cosine + PCA) - Default', 'KNN (Cosine + PCA) - Tuned',
            'NB (Gaussian) - Default', 'NB (Gaussian) - Tuned',
            'NB (Gaussian + LDA) - Default', 'NB (Gaussian + LDA) - Tuned',
            'Ensemble - Without Feature Selection', 'Ensemble - With PCA'
        ],
        'Training Accuracy': [
            0.8449, 0.8628, 0.8476, 0.8620, 0.8209, 0.8847, 0.8202, 0.8866, 0.9097, 0.9552, 0.9097, 0.9599,
            0.9451, 0.9457, 0.9452, 0.9457, 0.9555, 0.9560, 0.9504, 0.9513, 0.9449, 0.9455, 0.9448, 0.9455,
            0.7871, 0.7860, 0.8056, 0.8056, 0.9574, 0.9829
        ],
        'Testing Accuracy': [
            0.8740, 0.8886, 0.8733, 0.8864, 0.7988, 0.8529, 0.7986, 0.8629, 0.8881, 0.9260, 0.8881, 0.9467,
            0.9348, 0.9355, 0.9350, 0.9357, 0.9417, 0.9412, 0.9321, 0.9333, 0.9207, 0.9221, 0.9212, 0.9221,
            0.7886, 0.7950, 0.8019, 0.8019, 0.9574, 0.9829
        ]
    }

    # Create the DataFrame
    summary_df = pd.DataFrame(data)

    # Display the summary table in Streamlit
    st.header("Summary of Model Results (70:30 Split Ratios)")
    st.dataframe(summary_df)



else:
      st.write("Please upload a CSV file to begin.")

Writing Ddos_Attack_Detection1.py


In [None]:
!streamlit run Ddos_Attack_Detection1.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://104.197.95.166:8501[0m
[0m
[K[?25hnpx: installed 22 in 7.068s
your url is: https://crazy-clowns-spend.loca.lt
  df = pd.concat([chunk for chunk in pd.read_csv(uploaded_file, chunksize=chunk_size)])
  df = pd.concat([chunk for chunk in pd.read_csv(uploaded_file, chunksize=chunk_size)])

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.countplot(x=' Label', data=df, palette='coolwarm')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  a