In [22]:
# Import
import numpy as np
import json
import os.path as osp

from collections import namedtuple
from typing import List

In [54]:
# Constant
METHODS = {
    'knn': {'folder_result': '../results/knn',
            'json_file': 'results_general.json'},
    'gnn': {'folder_result': '../results/gnn',
            'json_file': 'stats_gnn_training.json'}
}

In [58]:
class Experiment:
    
    def __init__(self, dataset: str, method: str, experiment_folders: List[str]) -> None:
        """
        
        """
        self.dataset = dataset
        self.method = method
        
        self.experiments = {name_exp: osp.join(METHODS[method]['folder_result'], dataset, name_exp)
                            for name_exp in experiment_folders}
        self.experiments_data = {}
        
        self._load_json_data()
        
    def _load_json_data(self) -> None:
        """Load the results data from json file"""
        for name_exp, folder_exp in self.experiments.items():
            filename = osp.join(folder_exp, METHODS[self.method]['json_file'])
            with open(filename, 'r') as file:
                data = json.load(file)
                
            self.experiments_data[name_exp] = data


experiments_knn = [
    Experiment('enzymes', 'knn', ['baseline', 'reduced_graphs_gnn_50']),
    Experiment('NCI1', 'knn', ['baseline_more_seeds','reduced_graphs_gnn_50']),
    Experiment('mutagenicity', 'knn', ['baseline_more_seeds', 'reduced_graphs_gnn_50']),
    Experiment('proteins', 'knn', ['baseline_more_seeds', 'reduced_graphs_gnn_50']),
    Experiment('dd', 'knn', ['baseline_more_seeds', 'reduced_graphs_gnn_50']),
]

experiments_gnn = [
    Experiment('ENZYMES', 'gnn', ['50']),
    Experiment('NCI1', 'gnn', ['50']),
    Experiment('Mutagenicity', 'gnn', ['50']),
    Experiment('PROTEINS', 'gnn', ['50']),
    Experiment('DD', 'gnn', ['50']),
]

In [70]:
def accuracies_time_knn(data_knn):
    for name_exp, data in data_knn.items():
        accuracies = []
        prediction_times = []
        
        for exp_title, exp_res in data.items():
            if exp_title == 'parameters':
                continue
            
            try:
                accuracies.append(exp_res['acc_test'])
                prediction_times.append(exp_res['prediction_time_test'])
            except KeyError:
                pass

        np_accuracies = np.array(accuracies)
        np_prediction_times = np.array(prediction_times)
        
        print(f'Number of runs: {len(np_accuracies)}')
        print(f'Mean acc test {name_exp}: {np.mean(np_accuracies):.2f}±{np.std(np_accuracies):.2f}')
        print(f'Mean time test {name_exp}: {np.mean(np_prediction_times):.2f}±{np.std(np_prediction_times):.2f}')
        
        print()

def accuracies_gnn(data_gnn):
    for name_exp, data in data_gnn.items():
        accuracies = []
        
        for exp_title, exp_res in data.items():
            if exp_title == 'parameters':
                continue
            
            try:
                accuracies.append(exp_res['best_test_acc'])
            except KeyError:
                pass

        np_accuracies = np.array(accuracies) * 100
        
        print(f'Number of runs: {len(np_accuracies)}')
        print(f'Mean acc test {name_exp}: {np.mean(np_accuracies):.2f}±{np.std(np_accuracies):.2f}')
        
        print()
    
        
for exps_knn, exps_gnn in zip(experiments_knn, experiments_gnn):
    print(f'{exps_knn.dataset}')
    print('-'*len(exps_knn.dataset))
    
    # print(exps_gnn.experiments_data['50'].items())
    accuracies_time_knn(exps_knn.experiments_data)
    accuracies_gnn(exps_gnn.experiments_data)
    
        
# experiments[0].experiments_data

enzymes
-------
Number of runs: 10
Mean acc test baseline: 49.00±3.82
Mean time test baseline: 11.02±2.62

Number of runs: 10
Mean acc test reduced_graphs_gnn_50: 42.50±4.59
Mean time test reduced_graphs_gnn_50: 4.89±0.75

Number of runs: 10
Mean acc test 50: 27.33±4.86

NCI1
----
Number of runs: 5
Mean acc test baseline_more_seeds: 72.00±1.12
Mean time test baseline_more_seeds: 261.13±13.72

Number of runs: 5
Mean acc test reduced_graphs_gnn_50: 71.61±1.67
Mean time test reduced_graphs_gnn_50: 70.16±7.52

Number of runs: 5
Mean acc test 50: 65.94±2.57

mutagenicity
------------
Number of runs: 5
Mean acc test baseline_more_seeds: 74.33±1.81
Mean time test baseline_more_seeds: 307.50±19.19

Number of runs: 5
Mean acc test reduced_graphs_gnn_50: 72.65±2.60
Mean time test reduced_graphs_gnn_50: 85.42±2.01

Number of runs: 5
Mean acc test 50: 72.63±4.52

proteins
--------
Number of runs: 5
Mean acc test baseline_more_seeds: 71.79±4.17
Mean time test baseline_more_seeds: 38.10±2.32

Number

In [12]:
test_accuracies = []
test_accuracies_50 = []

for exp_title, exp in data_full.items():
    if exp_title == 'parameters':
        continue

    test_accuracies.append(exp['acc_test'])
    
for exp_title, exp in data_50.items():
    if exp_title == 'parameters':
        continue

    test_accuracies_50.append(exp['acc_test'])
    print(exp_title, exp['best_params'], exp['acc_test'])

exp_16 [1, 0.9] 49.166666666666664
exp_58 [1, 0.75] 42.5
exp_255 [1, 0.75] 42.5
exp_277 [1, 0.95] 48.333333333333336
exp_307 [1, 0.95] 38.333333333333336
exp_657 [1, 0.75] 46.666666666666664
exp_1211 [1, 0.95] 44.166666666666664
exp_1344 [1, 0.95] 48.333333333333336
exp_1841 [1, 0.9] 53.333333333333336
exp_1860 [1, 0.95] 42.5


In [8]:
np_test_acc = np.array(test_accuracies)
np_test_acc_50 = np.array(test_accuracies_50)
print(f'Mean acc test full size: {np.mean(np_test_acc):.2f}±{np.std(np_test_acc):.2f}')
print(f'Mean acc test 50% reduced: {np.mean(np_test_acc_50):.2f}±{np.std(np_test_acc_50):.2f}')

Mean acc test full size: 49.08±3.77
Mean acc test 50% reduced: 44.33±4.21


In [10]:
print(f'Mean acc test full size: {np.mean(np_test_acc):.2f}±{np.std(np_test_acc):.2f}')
print(f'Mean acc test 50% reduced: {np.mean(np_test_acc_50):.2f}±{np.std(np_test_acc_50):.2f}')

Mean acc test: 49.08±3.77
Mean acc test: 45.58±4.15


In [11]:
gnn_stats = {
    1860: {'val': 0.31, 'test': 0.23},
    1344: {'val': 0.38, 'test': 0.28},
    255: {'val': 0.34, 'test': 0.26},
    1841: {'val': 0.28, 'test': 0.17},
    277: {'val': 0.36, 'test': 0.35},
    58: {'val': 0.31, 'test': 0.27},
    16: {'val': 0.26, 'test': 0.33},
    307: {'val':  0.38, 'test': 0.30},
    657: {'val':  0.29, 'test': 0.24},
    1211: {'val':  0.36, 'test': 0.33},
}

In [15]:
np_test_train = np.array([stats['test'] for run, stats in gnn_stats.items()]) * 100

In [16]:
print(f'Mean acc test GNN: {np.mean(np_test_train):.2f}±{np.std(np_test_train):.2f}')

Mean acc test GNN: 27.60±5.18


In [17]:
print(np_test_train)

[23. 28. 26. 17. 35. 27. 33. 30. 24. 33.]
