In [2]:
# Import libraries
import pandas as pd
import numpy as np
import math
import warnings

# Modeling Libraries
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
from tabulate import tabulate

warnings.filterwarnings("ignore")

# Loading the dataset

df_original = pd.read_csv("Diabetes _with_new_colum(attack).csv")
print('Classification report of poisoning detection:')

# For each rate r of data poisoning

poisoning_rates = [0.05, 0.25, 0.5, 0.75]

# params for every model

params = {
    "RandomForestClassifier": {'n_estimators': [100, 200], 'max_depth': [5, 10], 'criterion': ['gini', 'entropy']},
    "GradientBoostingClassifier": {'n_estimators': [100, 200], 'learning_rate': [0.01, 0.1], 'max_depth': [3, 5]},
    "LogisticRegression": {'C': [0.01, 0.1, 1, 10], 'penalty': ['l2']},
    "MLPClassifier": {'hidden_layer_sizes': [(50,), (100,)], 'activation': ['tanh', 'relu'], 'max_iter': [1000]}
}

for r in poisoning_rates:
    df = df_original.copy()

    # Calculating the number of labels to flip

    Outcomepoison = math.ceil(len(df) * r)
    
    #Selecting Outcomepoison observations to poison 
    
    poison_indices = df.sample(n=Outcomepoison, random_state=42).index
    
    # Flipping the labels of the selected observations
    
    df.loc[poison_indices, 'Outcome'] = 1 - df.loc[poison_indices, 'Outcome']
    df.loc[poison_indices, 'attack'] = 1 - df.loc[poison_indices, 'attack']
    
    # Saveing the poisoned dataset 
    df.to_csv(f'Diabetes_Poisoning_Detecton_{int(r*100)}.csv', index=False)
    
    # Splitting the dataset
    X_train, X_test, y_train, y_test = train_test_split(df.drop(['attack'], axis=1), df['attack'], test_size=0.3, random_state=101, stratify=df['attack'])

    # SMOTE applide for class balance 
    sm = SMOTE(random_state=42)
    X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

    print(f"Metrics for poisoning rate {r}:")
    
    for model_name, param in params.items():
        
    # Training the model using GridSearchCV to find the best parameters

        model = eval(model_name)()
        clf = GridSearchCV(model, param, cv=5, scoring='accuracy', n_jobs=-1)
        clf.fit(X_train_res, y_train_res)
        best_model = clf.best_estimator_
        
        y_pred = best_model.predict(X_test)
        
        # metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        
        # Print metrics and confusion matrix
        print(f"\nModel: {model_name}")
        print(f"Best Parameters: {clf.best_params_}")
        print(f"Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}")
        conf_matrix = confusion_matrix(y_test, y_pred)
        print(f"Confusion Matrix:\n{conf_matrix}")


Classification report of poisoning detection:
Metrics for poisoning rate 0.05:

Model: RandomForestClassifier
Best Parameters: {'criterion': 'gini', 'max_depth': 10, 'n_estimators': 200}
Accuracy: 0.9090909090909091, Precision: 0.09090909090909091, Recall: 0.08333333333333333, F1-score: 0.08695652173913043
Confusion Matrix:
[[209  10]
 [ 11   1]]

Model: GradientBoostingClassifier
Best Parameters: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200}
Accuracy: 0.9177489177489178, Precision: 0.1111111111111111, Recall: 0.08333333333333333, F1-score: 0.09523809523809525
Confusion Matrix:
[[211   8]
 [ 11   1]]

Model: LogisticRegression
Best Parameters: {'C': 10, 'penalty': 'l2'}
Accuracy: 0.7359307359307359, Precision: 0.0847457627118644, Recall: 0.4166666666666667, F1-score: 0.1408450704225352
Confusion Matrix:
[[165  54]
 [  7   5]]

Model: MLPClassifier
Best Parameters: {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000}
Accuracy: 0.8744588744588745, Precisio

In [1]:
#svm model
import math
import pandas as pd
import numpy as np
import warnings 
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from tabulate import tabulate


warnings.filterwarnings("ignore")

# Loading the dataset
df_original = pd.read_csv("Diabetes _with_new_colum(attack).csv")
print('classification report of poisoning detection:')
df_original = df_original.sample(frac=1, random_state=42)

# For each rate r of data poisoning, calculate the number of labels to flip
for r in [0.05, 0.25, 0.5, 0.75]:
    
    # Create a copy of the original dataset for this iteration
    df = df_original.copy()

    # Calculating the number of labels to flip
    Outcomepoison = math.ceil(len(df) * r)

    # Selecting Outcomepoison observations to poison
    poison_indices = df.sample(n=Outcomepoison, random_state=42).index

    # Flipping the labels of the selected observations
    df.loc[poison_indices, 'Outcome'] = 1 - df.loc[poison_indices, 'Outcome']
    df.loc[poison_indices, 'attack'] = 1 - df.loc[poison_indices, 'attack']  

    # Save the poisoned dataset as a CSV file
    df.to_csv(f'Diabetes_Poisoning_Detecton_{int(r*100)}.csv', index=False)

    # Splitting the dataset 
    X_train, X_test, y_train, y_test = train_test_split(df.drop(['attack'], axis=1), df['attack'], test_size=0.3, random_state=101, stratify=df['attack'])

    # Create an instance of the classifiers
    classifiers = [
        SVC(kernel='linear', C=10),
    ]

    metrics_results = {}

    
    # Train the model and print the results
    for classifier in classifiers:
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        metrics_results[classifier.__class__.__name__] = {
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1-score": f1
        }

    # Print the metrics results
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("Conf_matrix:\n", conf_matrix)

    print(f"Outcomepoison: {Outcomepoison}")
    #print(df['Outcome'].value_counts())
    print(f"Metrics for poisoning rate {r}:")
    headers = ["Model", "Accuracy", "Precision", "Recall", "F1-score"]
    table = []
    for model, metrics in metrics_results.items():
        row = [model]
        for metric in metrics.values():
            row.append(metric)
        table.append(row)
    print(tabulate(table, headers, tablefmt="simple"))
    print()

classification report of poisoning detection:
مصفوفة الالتباس:
 [[219   0]
 [ 12   0]]
Outcomepoison: 39
Metrics for poisoning rate 0.05:
Model      Accuracy    Precision    Recall    F1-score
-------  ----------  -----------  --------  ----------
SVC        0.948052            0         0           0

مصفوفة الالتباس:
 [[173   0]
 [ 58   0]]
Outcomepoison: 192
Metrics for poisoning rate 0.25:
Model      Accuracy    Precision    Recall    F1-score
-------  ----------  -----------  --------  ----------
SVC        0.748918            0         0           0

مصفوفة الالتباس:
 [[66 49]
 [36 80]]
Outcomepoison: 384
Metrics for poisoning rate 0.5:
Model      Accuracy    Precision    Recall    F1-score
-------  ----------  -----------  --------  ----------
SVC        0.632035     0.620155  0.689655    0.653061

مصفوفة الالتباس:
 [[  0  58]
 [  0 173]]
Outcomepoison: 576
Metrics for poisoning rate 0.75:
Model      Accuracy    Precision    Recall    F1-score
-------  ----------  -----------  -