In [5]:
from sklearn.model_selection import cross_val_score, cross_validate, StratifiedKFold
from sklearn.metrics import accuracy_score, balanced_accuracy_score, recall_score, precision_score, confusion_matrix, make_scorer
from sklearn import svm
import numpy as np
import pandas as pd
from utils import specificity_score, negative_prediction_value_score, gmean_score, informedness_score
from sklearn.model_selection import ShuffleSplit

# Data Loading

In [6]:
# Load the data
df = pd.read_csv('./data/HTRU_2.csv', header=None)
df.columns = ['IpMean', 'IpDev', 'IpKurt','IpSkew', 'DMMean', 'DMDev', 'DMKurt', 'DMSkew', 'Class']

# Split the data into features and target
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Define the number of experiments
n_runs = 100

# Initialize lists to store the scores
scores = {
    'accuracy': [],
    'balanced_accuracy': [],
    'recall': [],
    'specificity': [],
    'precision': [],
    'npv': [],
    'gmean': [],
    'informedness': []
}


# Create a ShuffleSplit instance
ss = ShuffleSplit(n_splits=n_runs, train_size=200, test_size=400)

# Training SVM

In [7]:
# Initialize the model
model = svm.SVC(kernel='linear', C=1.0) 

for train_index, test_index in ss.split(X):
    # Split the data into training and testing sets
    x_train, x_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train the model and make predictions
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)

    # Store the scores
    scores['accuracy'].append(accuracy_score(y_test, y_pred))
    scores['balanced_accuracy'].append(balanced_accuracy_score(y_test, y_pred))
    scores['recall'].append(recall_score(y_test, y_pred))
    scores['specificity'].append(specificity_score(y_test, y_pred))
    scores['precision'].append(precision_score(y_test, y_pred))
    scores['npv'].append(negative_prediction_value_score(y_test, y_pred))
    scores['gmean'].append(gmean_score(y_test, y_pred))
    scores['informedness'].append(informedness_score(y_test, y_pred))

# Prediction

In [8]:
# Calculate mean and standard deviation for each metric and print the results
for metric, values in scores.items():
    mean_value = np.mean(values)
    std_value = np.std(values)
    print(f"{metric.capitalize()}: {mean_value:.3f} ± {std_value:.3f}")

Accuracy: 0.972 ± 0.010
Balanced_accuracy: 0.889 ± 0.047
Recall: 0.787 ± 0.095
Specificity: 0.991 ± 0.009
Precision: 0.907 ± 0.078
Npv: 0.978 ± 0.009
Gmean: 0.881 ± 0.055
Informedness: 0.778 ± 0.094
