In [None]:
import os
import json
import re
import warnings
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

In [21]:
class ExperimentArgs(object):
    
    def __init__(self, args):
        self._args = args
        
    @property
    def experiment_name(self):
        return self._args["experiment_name"]
    
    @property
    def dataset_name(self):
        return self._extract_dataset_name()
    
    @property
    def model_name(self):
        return self._args["model_name"]
        
    def _extract_dataset_name(self):
        split = self._args["dataset_folder"].split("/")
        for i in np.array(range(1, len(split)+1))*-1:
            ds_name = split[i]
            if ds_name != "":
                return ds_name
        else:
            return ""
        
    @classmethod
    def from_json_file(cls, path):
        with open(path, "r") as f:
            args = json.load(f)
            return cls(args)
        
        
class ExperimentLogs(object):
    
    _GRAND_MEAN_STD_PATTERN = re.compile(r"Multi-run values for test-mean:(\d+\.\d+) test-std: (\d+\.\d+)")
    _TRAINABLE_PARAMS_PATTERN = re.compile(r"Number of trainable model parameters: (\d+)")
    _TOTAL_PARAMS_PATTERN = re.compile(r"Number of total model parameters: (\d+)")
    _NUMBER_REPETITIONS_PATTERN = re.compile(r"Multi-Run: \d+ of (\d+)")
    
    def __init__(self, log):
        self._log = log
        
        self._test_grandmean = None
        self._test_std = None
        self._trainable_model_params = None
        self._total_model_params = None
        self._number_repetitions = None
        
    @property
    def test_accuracy_grandmean(self):
        if self._test_grandmean is None:
            self._parse_grandmean_and_std()
        return self._test_grandmean
    
    @property
    def test_accuracy_std(self):
        if self._test_grandmean is None:
            self._parse_grandmean_and_std()
        return self._test_std
    
    @property
    def trainable_model_params(self):
        if self._trainable_model_params is None:
            self._parse_number_parameters()
        return self._trainable_model_params
    
    @property
    def total_model_params(self):
        if self._total_model_params is None:
            self._parse_number_parameters()
        return self._total_model_params
    
    @property
    def number_repetitions(self):
        if self._number_repetitions is None:
            self._parse_number_repetitions()
        return self._number_repetitions

    def _parse_grandmean_and_std(self):
        try:
            match = re.findall(self._GRAND_MEAN_STD_PATTERN, self._log)[0]
            self._test_grandmean = float(match[0])
            self._test_std = float(match[1])
        except Exception as e:
            self._test_grandmean = np.nan
            self._test_std = np.nan
            warnings.warn("Log files does not seem complete, it might be corrupted or otherwise missing results: %s"
                         % str(e))
        
    def _parse_number_parameters(self):
        try:
            match_trainable = re.findall(self._TRAINABLE_PARAMS_PATTERN, self._log)
            match_total = re.findall(self._TOTAL_PARAMS_PATTERN, self._log)
            self._trainable_model_params = int(match_trainable[0])
            self._total_model_params = int(match_trainable[0])
        except Exception as e:
            self._trainable_model_params = np.nan
            self._total_model_params = np.nan
            warnings.warn("Log files does not seem complete, it might be corrupted or otherwise missing results: %s"
                         % str(e))
            
    def _parse_number_repetitions(self):
        try:
            match_number_repetitions = re.findall(self._NUMBER_REPETITIONS_PATTERN, self._log)
            self._number_repetitions = int(match_number_repetitions[0])
        except:
            self._number_repetitions = np.nan
            warnings.warn("Log files does not seem complete, it might be corrupted or otherwise missing results: %s"
                         % str(e))
            
        
        
    @classmethod
    def from_log_file(cls, path):
        with open(path, "r") as f:
            return cls(f.read())
        
class Experiment(object):
    
    def __init__(self, experiment_logs, experiment_args, path, name=None):
        self.experiment_logs = experiment_logs
        self.experiment_args = experiment_args
        self.path = path
        self._name = name
        
    @property
    def experiment_name(self):
        return self._name if self._name is not None else self.experiment_args.experiment_name
    
    @property
    def dataset_name(self):
        return self.experiment_args.dataset_name
    
    @property
    def model_name(self):
        return self.experiment_args.model_name
    
    @property
    def test_accuracy(self):
        return self.experiment_logs.test_accuracy_grandmean
    
    def __lt__(self, other):
        return self.test_accuracy < other.test_accuracy
    
    def __repr__(self):
        return "Experiment Name: %s\nModel Name: %s\nDataset Name: %s\nTest Accuracy: %f\n" % (
            self.experiment_name, self.model_name, self.dataset_name, self.test_accuracy)

In [22]:
experiments = []
for root, dirs, files in os.walk('log/SpectralModelEvaluation'):
    if 'logs.txt' in files:
        args = ExperimentArgs.from_json_file(os.path.join(root, "args.txt"))
        logs = ExperimentLogs.from_log_file(os.path.join(root, "logs.txt"))
        experiment = Experiment(logs, args, root)
        experiments.append(experiment)

In [25]:
records = []

for experiment in experiments:
    name = experiment.model_name
    dataset = experiment.dataset_name
    mean = experiment.experiment_logs.test_accuracy_grandmean
    std = experiment.experiment_logs.test_accuracy_std
    trainable_parameters = experiment.experiment_logs.trainable_model_params
    total_parameters = experiment.experiment_logs.total_model_params
    number_repetitions = experiment.experiment_logs.number_repetitions
    
    record = (name, dataset, mean, std, std/np.sqrt(10), trainable_parameters, total_parameters, number_repetitions)
    records.append(record)
    
experiment_df = pd.DataFrame.from_records(records, 
                                          columns=[
                                              "ModelName",
                                              "Dataset",
                                              "MeanTestAccuracy", 
                                              "TestStandardDeviation", 
                                              "TestStandardError", 
                                              "TrainableParameters", 
                                              "TotalParameters",
                                              "NumberRepetitions"])

In [27]:
experiment_df.to_csv("../summary_experiments2d.csv")