In [2]:
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
%matplotlib inline
import numpy as np
import seaborn as sns
%pylab inline
pylab.rcParams['figure.figsize'] = (10, 8)
colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)

Populating the interactive namespace from numpy and matplotlib


In [24]:
device_prefix = ['CPU', 'GPU']
models = ['logistic_regression', 'least_square', 'linear_svm']
#data_files = ['T_{}'.format(m) for m in (1000000, 2000000, 5000000, 10000000, 20000000, 50000000)]
data_files = ['T_{}'.format(m) for m in (1000000, 2000000, 5000000, 10000000)]
fit_choices = ['fitting', 'without_fit']

In [25]:
from itertools import product

def file_name_generator(device_prefix, models, data_files, fit_choices):
    file_names = []
    for i, comb in enumerate(product(device_prefix, models, data_files, fit_choices)):
        file_names.append((comb[0], comb[0]+'_'+comb[1]+'_'+comb[2]+'_'+comb[3]))
    return file_names

In [26]:
# generate file names
file_names = file_name_generator(device_prefix, models, data_files, fit_choices)

#for t in file_names:
#    print(t[0], t[1])

In [32]:
stats = {}
for i, f in enumerate(file_names):
    _prefix = './cpu_results/' if f[0] == 'CPU' else './gpu_results/'
    stats[f[1]] = {}
    with open(_prefix+f[1], 'rb') as log_file:
        iteration_counter = 0
        memory_movement_counter = 0
        computation_counter = 0
        reduce_counter = 0
        for line_index, line in enumerate(log_file.readlines()):
            line_content = line.rstrip('\n')
            if line_index == 0:
                stats[f[1]].update({'data_loading_time':float(line_content.split(':')[-1])})
            elif line_index == 1:
                stats[f[1]].update({'malloc_time':float(line_content.split(':')[-1])})
            else:
                iter_cost, memo_move_cost = line_content.split(',')[2], line_content.split(',')[3]
                comp_cost, reduce_cost = line_content.split(',')[4], line_content.split(',')[5]
                # count time
                iteration_counter += float(iter_cost.split(':')[-1])
                memory_movement_counter += float(memo_move_cost.split(':')[-1])
                computation_counter += float(comp_cost.split(':')[-1])
                reduce_counter += float(reduce_cost.split(':')[-1])
        
        stats[f[1]].update({'iter_cost':iteration_counter})
        stats[f[1]].update({'memory_movement_cost':memory_movement_counter})
        stats[f[1]].update({'computation_cost':computation_counter})
        stats[f[1]].update({'reduce_cost':reduce_counter})

In [33]:
print(stats.keys())

['CPU_least_square_T_1000000_without_fit', 'GPU_logistic_regression_T_1000000_without_fit', 'CPU_linear_svm_T_10000000_fitting', 'CPU_least_square_T_2000000_fitting', 'CPU_logistic_regression_T_10000000_without_fit', 'CPU_logistic_regression_T_10000000_fitting', 'GPU_logistic_regression_T_10000000_without_fit', 'GPU_least_square_T_5000000_fitting', 'GPU_linear_svm_T_5000000_fitting', 'GPU_linear_svm_T_2000000_fitting', 'GPU_least_square_T_1000000_without_fit', 'GPU_least_square_T_2000000_fitting', 'GPU_least_square_T_10000000_without_fit', 'GPU_logistic_regression_T_10000000_fitting', 'CPU_least_square_T_10000000_without_fit', 'CPU_least_square_T_5000000_fitting', 'CPU_logistic_regression_T_1000000_without_fit', 'GPU_least_square_T_2000000_without_fit', 'CPU_linear_svm_T_1000000_without_fit', 'CPU_logistic_regression_T_5000000_fitting', 'CPU_least_square_T_1000000_fitting', 'CPU_linear_svm_T_1000000_fitting', 'CPU_logistic_regression_T_2000000_without_fit', 'GPU_least_square_T_5000000_

In [36]:
for k, v in stats.items():
    print("File Name: {}".format(k))
    for sub_k, sub_v in v.items():
        print("Item: {}, Val: {}".format(sub_k, sub_v))
    print("-"*100)

File Name: CPU_least_square_T_1000000_without_fit
Item: reduce_cost, Val: 0.9798
Item: memory_movement_cost, Val: 26.8225
Item: iter_cost, Val: 36.8658
Item: data_loading_time, Val: 3.60035610199
Item: malloc_time, Val: 0.000576972961426
Item: computation_cost, Val: 10.0401
----------------------------------------------------------------------------------------------------
File Name: GPU_logistic_regression_T_1000000_without_fit
Item: reduce_cost, Val: 0.227
Item: memory_movement_cost, Val: 29.8081
Item: iter_cost, Val: 30.2305
Item: data_loading_time, Val: 3.53979897499
Item: malloc_time, Val: 0.00333189964294
Item: computation_cost, Val: 0.4221
----------------------------------------------------------------------------------------------------
File Name: CPU_linear_svm_T_10000000_fitting
Item: reduce_cost, Val: 9.4774
Item: memory_movement_cost, Val: 0.0459
Item: iter_cost, Val: 376.0088
Item: data_loading_time, Val: 35.6351840496
Item: malloc_time, Val: 0.26513504982
Item: computati