In [5]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 10 16:28:28 2018
Updated for Cross-Validation
"""

import constants as ct
import argparse
import joblib
import numpy as np
from sklearn import preprocessing, metrics
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier  # or any other classifier
from math import sqrt

def parse_arguments():
    parser = argparse.ArgumentParser(description='Evaluate with Cross Validation.')
    parser.add_argument('-p',
                        metavar='<feature path>',
                        help='Path to the directory of the extracted features')
    parser.add_argument('-k',
                        type=int,
                        default=5,
                        help='Number of folds for cross-validation (default: 5)')
    args = parser.parse_args()
    return args

def cross_validate(X, y, k):
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
    accuracies = []

    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Normalize the data
        scaler = preprocessing.MinMaxScaler((-1, 1))
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        # Initialize your classifier
        model = RandomForestClassifier(n_estimators=100, random_state=42)  # Example classifier
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Calculate accuracy
        accuracy = metrics.accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)
    return accuracies

# Load data
dic = np.load("/Users/ct/Library/Mobile Documents/com~apple~CloudDocs/cybersecurity_robotics/WebsiteFingerprinting/attacks/cumul/results/torque_data.npy", allow_pickle=True).item()
X = np.array(dic['feature'])
y = np.array(dic['label'])

# Perform cross-validation
acc_scores = cross_validate(X, y, 10)
mean_accuracy = np.mean(acc_scores)
std_dev = np.std(acc_scores)
print('cumul')
print('10-fold Cross Validation Accuracy Scores:', acc_scores)
print('Mean Accuracy:', mean_accuracy)
print('Standard Deviation of Accuracy:', std_dev)
print(acc_scores)
z_value = 1.96  # For a 95% confidence interval
margin_of_error = z_value * (std_dev / sqrt(10))
lower_bound = mean_accuracy - margin_of_error
upper_bound = mean_accuracy + margin_of_error
confidence_interval = (lower_bound, upper_bound)
print("95% Confidence Interval: {:.4f} to {:.4f}".format(*confidence_interval))    

cumul
10-fold Cross Validation Accuracy Scores: [0.65, 0.6, 0.725, 0.525, 0.75, 0.65, 0.55, 0.625, 0.625, 0.675]
Mean Accuracy: 0.6375
Standard Deviation of Accuracy: 0.06637959023675875
[0.65, 0.6, 0.725, 0.525, 0.75, 0.65, 0.55, 0.625, 0.625, 0.675]
95% Confidence Interval: 0.5964 to 0.6786
