In [14]:
import re
from scipy.stats import mannwhitneyu, kruskal

In [10]:
def extract_subject_data(file_path):
    subject_data = {}
    with open(file_path, 'r') as f:
        lines = f.readlines()
    
    # Regular expression to capture subject identifiers like 1069, 19, 201, etc.
    subject_pattern = re.compile(r"^(\d+):")
    
    current_subject = None
    for line in lines:
        match = subject_pattern.match(line.strip())
        
        if match:
            current_subject = match.group(1)  # subject ID
            subject_data[current_subject] = {}
        elif current_subject:
            # Extracting the values for accuracy, precision, recall, F1-score, and ROC AUC
            if "accuracy" in line:
                subject_data[current_subject]['accuracy'] = float(line.split(":")[1].strip())
            elif "precision" in line:
                subject_data[current_subject]['precision'] = float(line.split(":")[1].strip())
            elif "recall" in line:
                subject_data[current_subject]['recall'] = float(line.split(":")[1].strip())
            elif "F1-score" in line:
                subject_data[current_subject]['F1-score'] = float(line.split(":")[1].strip())
            elif "ROC AUC" in line:
                subject_data[current_subject]['ROC AUC'] = float(line.split(":")[1].strip())
    
    return subject_data

In [37]:
# Function to perform the tests
def compare_metrics(file_path_1, file_path_2):
    # Extract data from both files
    data_1 = extract_subject_data(file_path_1)
    data_2 = extract_subject_data(file_path_2)
    
    # Prepare a dictionary to store comparison results for each metric
    comparison_results = {}
    
    # Compare metrics
    for metric in ['accuracy', 'precision', 'recall', 'F1-score', 'ROC AUC']:
        # Extract values for the current metric from both data sets
        metric_data_1 = [subject_data[metric] for subject_data in data_1.values()]
        metric_data_2 = [subject_data[metric] for subject_data in data_2.values()]
        
        # Mann-Whitney U Test
        u_statistic, p_value_mannwhitney = mannwhitneyu(metric_data_1, metric_data_2)
        
        # Kruskal-Wallis Test
        h_statistic, p_value_kruskal = kruskal(metric_data_1, metric_data_2)
        
        # Store the results in the comparison_results dictionary
        comparison_results[metric] = {
            'Mann-Whitney U Test': {
                'U-statistic': f"{u_statistic:.3f}",
                'p-value': f"{p_value_mannwhitney:.3f}"
            },
            'Kruskal-Wallis Test': {
                'H-statistic': f"{h_statistic:.3f}",
                'p-value': f"{p_value_kruskal:.3f}"
            }
        }
    
    return comparison_results

In [47]:
# data_1 = "./metrics/SVM/scaled_rbf_mfcc_20_no_pitch_rand.txt"
# data_2 = "./metrics/SVM/scaled_rbf_mfcc_20_pitch_rand.txt"

# data_1 = "./metrics/SVM/rbf_pca_mfcc_20_no_pitch_rand.txt"
# data_2 = "./metrics/SVM/rbf_pca_mfcc_20_pitch_rand.txt"

# data_1 = "./metrics/SVM/scaled_rbf_mfcc_20_no_pitch_rand.txt"
# data_2 = "./metrics/SVM/rbf_pca_mfcc_20_no_pitch_rand.txt"

data_1 = "./metrics/HMM/diag_5_mfcc_20_no_pitch_rand.txt"
data_2 = "./metrics/HMM/diag_5_pca_mfcc_20_no_pitch_rand.txt"

# data_2 = "./metrics/HMM/diag_5_mfcc_20_pitch_rand.txt"

results = compare_metrics(data_1, data_2)

for metric, result in results.items():
    print(f"Metric: {metric}")
    for test, values in result.items():
        print(f"  {test}:")
        # Access 'U-statistic' for Mann-Whitney U Test, and 'H-statistic' for Kruskal-Wallis Test
        statistic_key = 'U-statistic' if 'Mann-Whitney' in test else 'H-statistic'
        print(f"    {statistic_key} = {values[statistic_key]}")
        print(f"    p-value = {values['p-value']}")

Metric: accuracy
  Mann-Whitney U Test:
    U-statistic = 215.500
    p-value = 0.685
  Kruskal-Wallis Test:
    H-statistic = 0.176
    p-value = 0.675
Metric: precision
  Mann-Whitney U Test:
    U-statistic = 224.500
    p-value = 0.516
  Kruskal-Wallis Test:
    H-statistic = 0.439
    p-value = 0.507
Metric: recall
  Mann-Whitney U Test:
    U-statistic = 217.500
    p-value = 0.645
  Kruskal-Wallis Test:
    H-statistic = 0.225
    p-value = 0.636
Metric: F1-score
  Mann-Whitney U Test:
    U-statistic = 212.500
    p-value = 0.745
  Kruskal-Wallis Test:
    H-statistic = 0.114
    p-value = 0.735
Metric: ROC AUC
  Mann-Whitney U Test:
    U-statistic = 223.000
    p-value = 0.543
  Kruskal-Wallis Test:
    H-statistic = 0.387
    p-value = 0.534
