In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, train_test_split, cross_validate
from sklearn.metrics import accuracy_score, f1_score
from time import time

In [2]:
# Path to the .names file
spambase_names_path = 'spambase.names'

# Function to read and extract feature names from the .names file
def extract_feature_names(file_path):
    with open(file_path, 'r') as file:
        # Skip the first 33 lines and start reading from line 34
        lines = file.readlines()[33:]
    # Extract feature names from the file content
    feature_names = []
    for line in lines:
        if ':' in line:
            name = line.split(':')[0]
            feature_names.append(name)
    return feature_names

# Adding names
feature_names = extract_feature_names(spambase_names_path)
feature_names.append('is_spam')  # Adding the class label since it is not defined as the last column name

# Load the dataset with names
spambase_data_path = 'spambase.data'
data = pd.read_csv(spambase_data_path, names=feature_names)

# Separate X and y
X = data.drop('is_spam', axis=1)
y = data['is_spam']


In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB

classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=100, max_features='sqrt'),
    "Naive Bayes": GaussianNB(),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
}


In [5]:
# Stratified K-Fold
skf = StratifiedKFold(n_splits=10)

In [6]:
# Define metrics to compute
scoring = ['accuracy', 'f1']

# Initialize a dictionary to store all results
results = {'Accuracy': {name: [] for name in classifiers.keys()},
           'F-Measure': {name: [] for name in classifiers.keys()},
           'Training Time': {name: [] for name in classifiers.keys()}}

# Perform cross-validation for each classifier
for name, clf in classifiers.items():
    cv_results = cross_validate(clf, X_train, y_train, cv=skf, scoring=scoring, return_train_score=False, n_jobs=-1)

    # Append the results for each fold to the results dictionary
    results['Accuracy'][name] = cv_results['test_accuracy']
    results['F-Measure'][name] = cv_results['test_f1']
    results['Training Time'][name] = cv_results['fit_time']

# Convert results for each metric into a DataFrame
for metric in results:
    results[metric] = pd.DataFrame(results[metric])

In [7]:
# Calculate mean and standard deviation for each classifier in each metric
final_results = {}
for metric, df in results.items():
    means = df.mean(axis=0).rename('Mean')
    stds = df.std(axis=0).rename('Std')
    final_results[metric] = pd.concat([means, stds], axis=1)


In [8]:
# Now, to print the results in the desired format
for metric in results:
    print(f"{metric} without Ranks:")
    print('-'*55)
    
    # Print the classifier names
    classifier_names = results[metric].columns
    print('   ', '  '.join(classifier_names))
    print('-'*55)
    
    # Print each row of data (for each fold)
    for index in range(len(results[metric])):
        row_data = [results[metric][classifier].iloc[index] for classifier in classifier_names]
        print(f"{index:<3}", '  '.join(f"{val:.6f}" for val in row_data))
    
    print('-'*55)

    # Calculate and print mean and std for each classifier
    means = results[metric].mean()
    stds = results[metric].std()

    print("mean", '  '.join(f"{means[name]:.6f}" for name in classifier_names))
    print("std", '  '.join(f"{stds[name]:.6f}" for name in classifier_names))
    print('-'*55)
    print()


Accuracy without Ranks:
-------------------------------------------------------
    Random Forest  Naive Bayes  Gradient Boosting
-------------------------------------------------------
0   0.970109  0.880435  0.967391
1   0.975543  0.804348  0.972826
2   0.959239  0.796196  0.951087
3   0.959239  0.823370  0.956522
4   0.959239  0.807065  0.951087
5   0.948370  0.815217  0.940217
6   0.926630  0.807065  0.921196
7   0.951087  0.826087  0.921196
8   0.953804  0.823370  0.942935
9   0.948370  0.820652  0.932065
-------------------------------------------------------
mean 0.955163  0.820380  0.945652
std 0.013389  0.023286  0.017657
-------------------------------------------------------

F-Measure without Ranks:
-------------------------------------------------------
    Random Forest  Naive Bayes  Gradient Boosting
-------------------------------------------------------
0   0.961938  0.865031  0.958621
1   0.968641  0.796610  0.965035
2   0.946996  0.787535  0.937063
3   0.947735  0.81

In [9]:
# Update the results DataFrame with ranks for each fold
for metric, df in results.items():
    ascending = True if metric == 'Training Time' else False
    ranks = df.rank(axis=1, ascending=ascending)
    
    ranked_df = df.copy()
    for col in ranked_df.columns:
        ranked_df[col] = ranked_df[col].round(6).astype(str) + " (" + ranks[col].astype(int).astype(str) + ")"
    
    results[metric] = ranked_df


In [10]:
# Your existing code for updating the results DataFrame with ranks...

# Print the results with ranks and calculate average ranks
for metric, df in results.items():
    print(f"{metric} with Ranks:")
    print('-'*55)
    
    # Print the classifier names
    classifier_names = df.columns
    print('  ', '  '.join(classifier_names))
    print('-'*55)
    
    # Print each row of data with ranks
    for index, row in df.iterrows():
        formatted_row = [f"{value}" for value in row]
        print(index, '  '.join(formatted_row))
    
    print('-'*55)

    # Calculate and print average rank for each classifier
    avg_ranks = df.applymap(lambda x: int(x.split('(')[1].replace(')', ''))).mean(axis=0)
    print("Average Rank", '  '.join(f"{avg_ranks[name]:.1f}" for name in classifier_names))
    print()


Accuracy with Ranks:
-------------------------------------------------------
   Random Forest  Naive Bayes  Gradient Boosting
-------------------------------------------------------
0 0.970109 (1)  0.880435 (3)  0.967391 (2)
1 0.975543 (1)  0.804348 (3)  0.972826 (2)
2 0.959239 (1)  0.796196 (3)  0.951087 (2)
3 0.959239 (1)  0.82337 (3)  0.956522 (2)
4 0.959239 (1)  0.807065 (3)  0.951087 (2)
5 0.94837 (1)  0.815217 (3)  0.940217 (2)
6 0.92663 (1)  0.807065 (3)  0.921196 (2)
7 0.951087 (1)  0.826087 (3)  0.921196 (2)
8 0.953804 (1)  0.82337 (3)  0.942935 (2)
9 0.94837 (1)  0.820652 (3)  0.932065 (2)
-------------------------------------------------------
Average Rank 1.0  3.0  2.0

F-Measure with Ranks:
-------------------------------------------------------
   Random Forest  Naive Bayes  Gradient Boosting
-------------------------------------------------------
0 0.961938 (1)  0.865031 (3)  0.958621 (2)
1 0.968641 (1)  0.79661 (3)  0.965035 (2)
2 0.946996 (1)  0.787535 (3)  0.937063 (2

  avg_ranks = df.applymap(lambda x: int(x.split('(')[1].replace(')', ''))).mean(axis=0)


In [11]:
def friedman_statistic(avg_ranks, N, k):
    # Calculation of Friedman statistic
    sum_of_squares = np.sum(avg_ranks ** 2)
    chi2 = (12 * N / (k * (k + 1))) * (sum_of_squares - (k * (k + 1) ** 2 / 4))
    degrees_of_freedom = k - 1

    return chi2, degrees_of_freedom

# Usage of the function
N = 10  # Number of datasets (folds)
k = 3   # Number of algorithms

avg_ranks_accuracy = np.array([1.1, 3, 1.7])  # Accuracy
avg_ranks_fmeasure = np.array([1.2, 3.0, 1.7])  # F-Measure
avg_ranks_training_time = np.array([2.0, 1.0, 3.0])  # Training Time

chi2_accuracy, df_accuracy = friedman_statistic(avg_ranks_accuracy, N, k)
chi2_fmeasure, df_fmeasure = friedman_statistic(avg_ranks_fmeasure, N, k)
chi2_training_time, df_training_time = friedman_statistic(avg_ranks_training_time, N, k)

print("Friedman Statistic and Degrees of Freedom for Each Metric:")
print(f"Accuracy: Chi2 = {chi2_accuracy}, df = {df_accuracy}")
print(f"F-Measure: Chi2 = {chi2_fmeasure}, df = {df_fmeasure}")
print(f"Training Time: Chi2 = {chi2_training_time}, df = {df_training_time}")


Friedman Statistic and Degrees of Freedom for Each Metric:
Accuracy: Chi2 = 11.000000000000014, df = 2
F-Measure: Chi2 = 13.299999999999983, df = 2
Training Time: Chi2 = 20.0, df = 2


In [16]:
def is_significant(chi2_statistic, df):
    critical_value = 7.8
    return chi2_statistic > critical_value

def nemenyi_critical_difference(N, k, alpha=0.05):
    # Critical value q_alpha 
    q_alpha = 2.343  # From 12.3 How to interpret it, "Post-Hoc test I" 
    return q_alpha * np.sqrt((k * (k + 1)) / (6 * N))


# Check significance for each metric and calculate critical differences if significant
significance_accuracy = is_significant(chi2_accuracy, df_accuracy)
significance_fmeasure = is_significant(chi2_fmeasure, df_fmeasure)
significance_training_time = is_significant(chi2_training_time, df_training_time)

cd_accuracy = nemenyi_critical_difference(N, k) if significance_accuracy else None
cd_fmeasure = nemenyi_critical_difference(N, k) if significance_fmeasure else None
cd_training_time = nemenyi_critical_difference(N, k) if significance_training_time else None

# Print results
print("Significance and Critical Differences for Each Metric:")
print(f"Accuracy: Significance = {significance_accuracy}, Critical Difference = {cd_accuracy}")
print(f"F-Measure: Significance = {significance_fmeasure}, Critical Difference = {cd_fmeasure}")
print(f"Training Time: Significance= {significance_training_time}, Critical Difference = {cd_training_time}")


Significance and Critical Differences for Each Metric:
Accuracy: Significance = True, Critical Difference = 1.0478214542564015
F-Measure: Significance = True, Critical Difference = 1.0478214542564015
Training Time: Significance= True, Critical Difference = 1.0478214542564015


In [None]:
stest