In [6]:
import numpy as np
import matplotlib.pyplot as plt
import json
import os, sys, glob
from collections import defaultdict

In [7]:
matches = glob.glob('average_results/*.json', recursive=True)
matches = [a for a in matches if "secondary" not in a]

for match in matches:
    print(match)

average_results\annthyroid_results.json
average_results\breastw_results.json
average_results\cardio_results.json
average_results\ionosphere_results.json
average_results\letter_results.json
average_results\mammography_results.json
average_results\mnist_results.json
average_results\optdigits_results.json
average_results\pendigits_results.json
average_results\pima_results.json
average_results\satellite_results.json
average_results\satimage-2_results.json
average_results\thyroid_results.json
average_results\vertebral_results.json
average_results\vowels_results.json
average_results\wbc_results.json
average_results\wine_results.json


AUC

In [8]:
def weighted_auc_lin(x, y):

    exp = np.nextafter(1.0, 2.0) # just above 1 for serie coveragence
    #exp = 2

    x = np.array(x)
    y = np.array(y)

    if len(x) != len(y):
        raise ValueError("x and y must have the same length")
    if len(x) < 2:
        raise ValueError("x and y must have at least two elements (one trapezoid)")

    auc = 0.0
    sum_w = 0.0
    prev_w = 1 / ((x[0] + 1) ** exp)  

    for i in range(1, len(x)):
        curr_w = 1 / ((x[i] + 1) ** exp)

        trapezoid_area = (x[i] - x[i-1]) * (prev_w * y[i-1] + curr_w * y[i]) / 2.0
        auc += trapezoid_area

        trapezoid_w = (x[i] - x[i-1]) * (prev_w + curr_w) / 2.0
        sum_w += trapezoid_w
    
    return auc / sum_w

In [9]:
save_path = "rwap_results_single"
for match in matches:
    base_path = os.path.dirname(match)
    file_name = os.path.basename(match)
    file_name = file_name.replace("results", "rwap_results")
    save_file = os.path.join(save_path, file_name)
    if not os.path.exists(save_path):
        os.makedirs(save_path, exist_ok=False)

    with open(match, 'r') as f:
        results = json.load(f)
    
    results = results["average_precision"]
    # model -> n_new_trees -> n_features  -> blocks
    output = defaultdict(lambda: defaultdict(dict))

    for model in results:
        for n_new_trees in results[model]:
            auc = []
            for nfeatures in results[model][n_new_trees]:
                xy = list(results[model][n_new_trees][nfeatures].items())
                xy = sorted(xy, key=lambda a: int(a[0].split(":")[-1]))
                x = [int(a[0].split(":")[-1]) for a in xy]
                y = [a[1] for a in xy]
                
                auc.append(weighted_auc_lin(x, y))

                # elbowx cannot be zero, safe to divide
            output[model][n_new_trees] = np.mean(auc)
    
    with open(save_file, 'w', encoding='utf-8') as f:
        json.dump(output, f, indent=4)

    

