In [None]:
%matplotlib inline

import json
import math

import numpy as np
import matplotlib.pyplot as plt

from abc import ABC, abstractmethod
from itertools import groupby
from collections import defaultdict

In [None]:
class Experiment:
    def __init__(self, name, input_file, parameters, rounds):
        self.__name = name
        self.__input_file = input_file
        self.__parameters = parameters
        self.__rounds = rounds
        
    @property
    def name(self):
        return self.__name
    
    @property
    def input_file(self):
        return self.__input_file
    
    @property
    def parameters(self):
        return self.__parameters
    
    @property
    def rounds(self):
        return self.__rounds
    
    def __repr__(self):
        return 'Experiment(name={0}, input_file={1}, parameters={2})'.format(
            self.__name, self.__input_file, self.__parameters)
    
class Round:
    def __init__(self, duration, objective, n):
        self.__duration = duration
        self.__objective = objective
        self.__n = n
        
    @property
    def duration(self):
        return self.__duration
    
    @property
    def objective(self):
        return self.__objective
    
    @property
    def n(self):
        return self.__n
    
    def __repr__(self):
        return 'Round(duration={0}, objective={1}, n={2})'.format(
            self.__duration, self.__objective, self.__n)
    
class DriverRound:
    def __init__(self, duration, device, n):
        self.__duration = duration
        self.__device = device
        self.__n = n
        
    @property
    def duration(self):
        return self.__duration
    
    @property
    def device(self):
        return self.__device
    
    @property
    def n(self):
        return self.__n
    
    def __repr__(self):
        return 'Round(duration={0}, device={1}, n={2})'.format(
            self.__duration, self.__device, self.__n)

In [None]:
class ExperimentLogParser:
    @abstractmethod
    def parse(self, file_path):
        pass

class PrefixExperimentLogParser(ExperimentLogParser):
    def __init__(self, prefix, parameters):
        self.__prefix = prefix
        self.__parameters = parameters
    
    def parse(self, file_path, no_params=False):
        with open(file_path, 'r') as f:
            lines = PrefixExperimentLogParser.filter_lines(f.readlines(), self.__prefix)
            
            start_idx = 1
            
            if no_params:
                start_idx = 0
            
            rounds = []
            for line in lines[start_idx:]:
                round_prop = json.loads(line)
                rounds.append(Round(round_prop['duration'], round_prop['objective'], round_prop['round']))
                
            experiment_prop = json.loads(lines[0])
            
            if no_params:
                name = ''
                input_file = ''
                parameters = {}
            else:
                name = experiment_prop['name']
                input_file = experiment_prop['inputFile']
                parameters = {
                    'eta': experiment_prop['eta'],
                    'lambda': experiment_prop['lambda'],
                    'H': experiment_prop['localIterFrac'],
                    'K': experiment_prop['numSplits']
                }
            
            return Experiment(name, input_file, parameters, rounds)
        
    @staticmethod
    def filter_lines(lines, prefix):
        return [line[line.find(prefix) + len(prefix):] for line in lines if prefix in line]
    
class PrefixDriverWithExperimentLogParser(ExperimentLogParser):
    def __init__(self, prefix, parameters):
        self.__prefix = prefix
        self.__parameters = parameters
    
    def parse(self, driver_log_path, file_path, no_params=False):
        with open(driver_log_path, 'r') as f_d:
            lines_d = PrefixExperimentLogParser.filter_lines(f_d.readlines(), self.__prefix)
            
            with open(file_path, 'r') as f_f:
                lines_f = PrefixExperimentLogParser.filter_lines(f_f.readlines(), self.__prefix)
                
                rounds_d = []
                for line in lines_d[1:]:
                    round_prop = json.loads(line)
                    rounds_d.append(DriverRound(round_prop['duration'], round_prop['device'], round_prop['round']))

                rnd_duration = {}
                for rnd, devices in groupby(rounds_d, lambda r: r.n):
                    rnd_duration[rnd] = max(devices, key=lambda d: d.duration).duration
                    
                experiment_prop = json.loads(lines_d[0])

                if no_params:
                    name = ''
                    input_file = ''
                    parameters = {}
                else:
                    name = experiment_prop['name']
                    input_file = experiment_prop['inputFile']
                    parameters = {
                        'eta': experiment_prop['eta'],
                        'lambda': experiment_prop['lambda'],
                        'H': experiment_prop['localIterFrac'],
                        'K': experiment_prop['numSplits']
                    }

                #device = json.loads(lines_f[0])['device']
                
                #rnd_duration = {rnd.n: rnd.duration for rnd in rounds_d if rnd.device == device}
                    
                rounds_f = []
                for line in lines_f[0:]:
                    round_prop = json.loads(line)
                    rounds_f.append(Round(rnd_duration[round_prop['duration']], round_prop['objective'], round_prop['round']))
            
            return Experiment(name, input_file, parameters, rounds_f)
        
    @staticmethod
    def filter_lines(lines, prefix):
        return [line[line.find(prefix) + len(prefix):] for line in lines if prefix in line]

In [None]:
class ExperimentLogPlotter:
    def __init__(self):
        self.__experiments = []
        self.__names = []
        self.__figsize = (8, 6)
        self.__ylim = 1e-4
        self.__yticks = [10e-1, 10e-2, 10e-3, 10e-4]
        self.__xlabel = "Seconds"
        self.__ylabel = ""
        self.__grid = False
        self.__which_grid = "both"
        self.__markers = ['x', 'o', 'v', '^', '.']
        self.__use_iterations = False
        self.__every_n = []
        self.__title = ""
        
    def add_experiment(self, experiment, name=None, every_n=1):
        self.__experiments.append(experiment)
        self.__every_n.append(every_n)
        if name:
            self.__names.append(name)
        else:
            self.__names.append(experiment.name)
    
    def figsize(self, figsize):
        self.__figsize = figsize
        
    def ylim(self, ylim):
        self.__ylim = ylim
        
    def yticks(self, ticks):
        self.__yticks = ticks
        
    def ylabel(self, label):
        self.__ylabel = label
        
    def enable_grid(self, which):
        self.__which_grid = which
        
    def use_iterations(self):
        self.__use_iterations = True
        self.__xlabel = "Iterations"
    
    def title(self, title):
        self.__title = title
    
    def plot(self):
        timestamps, objectives = ExperimentLogPlotter._prepare_experiments(self.__experiments, self.__ylim)
        
        plt.figure(figsize=self.__figsize)
        
        plt.ylim(self.__ylim)
        plt.yticks(self.__yticks)
        plt.xlabel(self.__xlabel)
        plt.ylabel(self.__ylabel)
        plt.grid(self.__grid, which=self.__which_grid)
        plt.xlim((0, ExperimentLogPlotter._max_timestamp(timestamps)))
        plt.title(self.__title)
        
        for i in range(len(timestamps)):
            timestamp = timestamps[i][::self.__every_n[i]]
            objective = objectives[i][::self.__every_n[i]]
            
            if self.__use_iterations:
                timestamp = range(len(timestamp))

            plt.semilogy(timestamp, objective, marker=self.__markers[i])
        
        plt.legend(self.__names, loc='upper right')
        plt.autoscale(axis='x')
        plt.xlim(xmin=0)
        
    @staticmethod
    def _max_timestamp(timestamps):
        max_timestamp = -np.inf
        for timestamp in timestamps:
            current_max = timestamp.max()
            if current_max > max_timestamp:
                max_timestamp = current_max
        return max_timestamp
      
    @staticmethod
    def _prepare_experiments(experiments, cut_off=1e-4):
        timestamps = []
        objectives = []
        
        for experiment in experiments:
            timestamps_arr = np.cumsum([rnd.duration / 1000 for rnd in experiment.rounds])
            objectives_arr = np.array([rnd.objective for rnd in experiment.rounds])
            
            objectives_arr = objectives_arr - objectives_arr.min()
            
            max_idx = -1
            for i, objective in enumerate(objectives_arr):
                if objective < cut_off:
                    max_idx = i
                    break
            
            timestamps.append(timestamps_arr[:max_idx])
            objectives.append(objectives_arr[:max_idx])
        
        min_objective = np.inf
        for objective in objectives:
            current_min = objective.min()
            if current_min < min_objective:
                min_objective = current_min
                
        #objectives = [objective - min_objective for objective in objectives]
        
        return timestamps, objectives

In [None]:
input_path_exa = '/Users/Chris/Studies/thesis/experiment_results/exa/exp_exa_epsilon_h01_2/exp_exa_epsilon_h01_2_091.txt'
input_path_exa_2 = '/Users/Chris/Studies/thesis/experiment_results/exa/exp_exa_epsilon_h01_s3/exp_exa_epsilon_h01_s3_091.txt'
input_path_exa_3 = '/Users/Chris/Studies/thesis/experiment_results/exa/exp_exa_epsilon_h005_s3/exp_exa_epsilon_h005_s3_091.txt'
input_path_exa_driver_2 = '/Users/Chris/Studies/thesis/experiment_results/exa/exp_exa_epsilon_h01_s3.txt'
input_path_exa_driver_3 = '/Users/Chris/Studies/thesis/experiment_results/exa/exp_exa_epsilon_h005_s3.txt'
input_path_local = '/Users/Chris/Studies/thesis/experiment_results/local/exp_local_epsilon_h01.txt'
input_path_spark = '/Users/Chris/Studies/thesis/experiment_results/spark/exp_spark_epsilon_h01.txt'

In [None]:
parser_spark = PrefixExperimentLogParser(prefix='EXPERIMENT|', parameters=['eta', 'lambda', 'localIterFrac', 'numSplits'])
parser_local = PrefixExperimentLogParser(prefix='EXPERIMENT|', parameters=['eta', 'lambda', 'localIterFrac', 'numSplits'])
parser_fw = PrefixExperimentLogParser(prefix='EXPERIMENT|', parameters=['eta', 'lambda', 'localIterFrac', 'numSplits'])
parser_fw_2 = PrefixDriverWithExperimentLogParser(prefix='EXPERIMENT|', parameters=['eta', 'lambda', 'localIterFrac', 'numSplits'])

In [None]:
plotter = ExperimentLogPlotter()
plotter.add_experiment(parser_local.parse(input_path_local), name='Local', every_n=20)
plotter.add_experiment(parser_spark.parse(input_path_spark), name='Spark', every_n=50)
plotter.ylabel(r'Suboptimality: $O_B(w) - O_B(w^*)$')
plotter.ylim(1e-4)
plotter.figsize((8, 6))
plotter.title('Epsilon - Suboptimality vs. Time')
plotter.plot()

In [None]:
input_path_url_exa = '/Users/Chris/Studies/thesis/experiment_results/exa/test.txt'
input_path_url_spark = '/Users/Chris/Studies/thesis/experiment_results/spark/exp_spark_url_h1.txt'

In [None]:
plotter = ExperimentLogPlotter()
plotter.add_experiment(parser_spark.parse(input_path_url_spark), 'Spark', every_n=100)
plotter.add_experiment(parser_fw.parse(input_path_url_exa, no_params=True), 'Framework', every_n=100)
plotter.ylabel(r'Suboptimality: $O_B(w) - O_B(w^*)$')
plotter.ylim(1e-4)
plotter.plot()

In [None]:
class ExperimentDoubleLogPlotter:
    def __init__(self):
        self.__experiments = []
        self.__names = []
        self.__figsize = (8, 6)
        self.__ylim = 1e-4
        self.__yticks = [10e-1, 10e-2, 10e-3, 10e-4]
        self.__xlabel = "Seconds"
        self.__ylabel = ""
        self.__grid = False
        self.__which_grid = "both"
        self.__markers = ['x', 'o', 'v', '^', '.']
        self.__use_iterations = False
        self.__every_n = []
        self.__title = ""
        self.__subplots = []
        
    def add_experiment(self, experiment, subplot=1, name=None, every_n=1):
        self.__experiments.append(experiment)
        self.__every_n.append(every_n)
        self.__subplots.append(subplot)
        if name:
            self.__names.append(name)
        else:
            self.__names.append(experiment.name)
    
    def figsize(self, figsize):
        self.__figsize = figsize
        
    def ylim(self, ylim):
        self.__ylim = ylim
        
    def yticks(self, ticks):
        self.__yticks = ticks
        
    def ylabel(self, label):
        self.__ylabel = label
        
    def enable_grid(self, which):
        self.__which_grid = which
        
    def use_iterations(self):
        self.__use_iterations = True
        self.__xlabel = "Iterations"
    
    def title(self, title):
        self.__title = title
    
    def plot(self):
        timestamps, objectives = ExperimentLogPlotter._prepare_experiments(self.__experiments, self.__ylim)
        
        n_subplots = max(self.__subplots)
        
        f, axarr = plt.subplots(nrows=1, ncols=n_subplots, figsize=self.__figsize)
        legends = defaultdict(list)
        
        for i in range(n_subplots):
            axarr[i].set_ylim(self.__ylim)
            axarr[i].set_yticks(self.__yticks)
            axarr[i].set_xlabel(self.__xlabel)
            axarr[i].set_ylabel(self.__ylabel)
            axarr[i].grid(self.__grid, which=self.__which_grid)
            #axarr[i].set_xlim((0, ExperimentLogPlotter._max_timestamp(timestamps)))
            axarr[i].set_title(self.__title)
        
        for i in range(len(timestamps)):
            timestamp = timestamps[i][::self.__every_n[i]]
            objective = objectives[i][::self.__every_n[i]]
            subplot = self.__subplots[i]
            name = self.__names[i]
            legends[subplot-1].append(name)
            
            if self.__use_iterations:
                timestamp = range(len(timestamp))

            axarr[subplot-1].semilogy(timestamp, objective, marker=self.__markers[i])
        
        for i in range(n_subplots):
            axarr[i].legend(legends[i], loc='upper right')
            axarr[i].autoscale(axis='x')
            axarr[i].set_xlim(xmin=0)
        
    @staticmethod
    def _max_timestamp(timestamps):
        max_timestamp = -np.inf
        for timestamp in timestamps:
            current_max = timestamp.max()
            if current_max > max_timestamp:
                max_timestamp = current_max
        return max_timestamp
      
    @staticmethod
    def _prepare_experiments(experiments, cut_off=1e-4):
        timestamps = []
        objectives = []
        
        for experiment in experiments:
            timestamps_arr = np.cumsum([rnd.duration / 1000 for rnd in experiment.rounds])
            objectives_arr = np.array([rnd.objective for rnd in experiment.rounds])
            
            objectives_arr = objectives_arr - objectives_arr.min()
            
            max_idx = -1
            for i, objective in enumerate(objectives_arr):
                if objective < cut_off:
                    max_idx = i
                    break
            
            timestamps.append(timestamps_arr[:max_idx])
            objectives.append(objectives_arr[:max_idx])
        
        min_objective = np.inf
        for objective in objectives:
            current_min = objective.min()
            if current_min < min_objective:
                min_objective = current_min
                
        #objectives = [objective - min_objective for objective in objectives]
        
        return timestamps, objectives

In [None]:
plotter = ExperimentDoubleLogPlotter()
plotter.add_experiment(parser_local.parse(input_path_local), name='Local', every_n=20)
plotter.add_experiment(parser_spark.parse(input_path_spark), name='Spark', every_n=50)
plotter.add_experiment(parser_fw.parse(input_path_exa, no_params=True), name='Framework', every_n=100)
plotter.add_experiment(parser_fw_2.parse(input_path_exa_driver_2, input_path_exa_2), subplot=2, name='Framework SSP=3, H=0.1', every_n=100)
plotter.add_experiment(parser_fw_2.parse(input_path_exa_driver_3, input_path_exa_3), subplot=2, name='Framework SSP=3, H=0.05', every_n=100)
plotter.ylabel(r'Suboptimality: $O_B(w) - O_B(w^*)$')
plotter.ylim(1e-4)
plotter.figsize((14, 6))
plotter.title('Epsilon - Suboptimality vs. Time')
plotter.plot()