In [None]:
from collections import OrderedDict, defaultdict
import pandas as pd
import numpy as np
import json
import os
from glob import glob
import matplotlib.pyplot as plt
%matplotlib qt5

## Python benchmark suite
https://pypi.python.org/pypi/performance

In [None]:
import parse_test


tests_dict = {}
for f in parse_test.load_files('python_run'):
    parse_test.get_test_times(tests_dict, f)
    
tuples = [(k1, k2, v) for k1, v1 in tests_dict.items() for k2, v2 in v1.items() for v in v2]
df = pd.DataFrame(tuples)
df.columns = ['test name', 'distribution', 'value']
df.set_index(['test name', 'distribution'])

In [None]:
group = df.groupby(('test name', 'distribution')).min().reset_index()
test_names = sorted(list(set(group['test name'])))
min_pip_times = pd.Series(data=group[group.distribution == 'pip']['value'].values, index=test_names)


In [None]:
df2 = pd.DataFrame({'Anaconda >=5': group[group.distribution == 'anaconda']['value'].values,
                    'Anaconda <5; Intel': group[group.distribution == 'intel']['value'].values
                   }, index=test_names)
df2 = (1 / df2).mul(min_pip_times, axis=0)



In [None]:
plot = df2.plot.hist(alpha=0.5, bins=30, colormap='brg')
plt.xlabel("Speed ratio, (t_ubuntu / t_distro)")
plt.ylabel("")
plot.yaxis.set_major_locator(plt.NullLocator())

## Black-Scholes benchmark
https://github.com/IntelPython/BlackScholes_bench

In [None]:
import re
line_re = re.compile(r'.*Size:\s(\d+)\sMOPS:\s(\d+\.\d+)')
files = glob("bs_run_serial/*.txt")
df = None
for fn in files:
    key = fn.split('/')[1].rstrip('.txt')
    with open(fn) as f:
        data = []
        for line in f.readlines():
            match = line_re.search(line)
            if match:
                data.append((int(match.group(1)), float(match.group(2))))   
    if df is None:    
        df = pd.DataFrame(data, columns=('Size', key))
    else:
        df[key] = [d[1] for d in data]
df.set_index('Size')

In [None]:
maxima = {key: df[[col for col in df.columns if key in col]].max(axis=1) for key in ('intel', 'anaconda', 'pip')}
maxima['Size'] = df.Size.values
df = pd.DataFrame(maxima)
df['Size'] = df['Size'] // 1024
df.set_index('Size')

In [None]:
df[['anaconda', 'intel']] = df[['anaconda', 'intel']].div(df.pip, axis=0)

In [None]:
df2 = df[['Size', 'intel', 'anaconda']]
df2 = df2.set_index('Size')

In [None]:
ax = df2.plot(colormap='brg', logx=True)
plt.xlabel('Size (thousands of elements)')
plt.ylabel('MOPS_distro / MOPS_pip')
plt.title('Black-Scholes (single-threaded)')

## BLAS benchmark suite
https://github.com/continuumio/mkl-optimizations-benchmarks

In [None]:
files = glob('mkl_serial/mkl_run_*/*.csv')
sizes = {'cholesky': [64,   90,  128,  181,  256,  362,  512,  724, 1024, 1448, 2048,
       2896, 4096, 5792, 8192],
        'dgemm': [64,   90,  128,  181,  256,  362,  512,  724, 1024, 1448, 2048,
       2896, 4096, 5792, 8192],
         'svd_over': [64,   90,  128,  181,  256,  362,  512,  724, 1024, 1448, 2048],
         'svd_under': [64,   90,  128,  181,  256,  362,  512,  724, 1024, 1448, 2048],
        'numexpr': [4096.        ,   5792.61875148,   8192.        ,  11585.23750296,
        16384.        ,  23170.47500592,  32768.        ,  46340.95001184,
        65536.        ,  92681.90002368, 131072.        , 185363.80004737,
       262144.],
        'fft': [16,   32,   64,  128,  256,  512, 1024, 2048, 4096, 8192]}

# results = {bmark: {distro: {size: 0}}}
results = defaultdict(OrderedDict)

for f in files:
    distro, benchmark = os.path.splitext(os.path.basename(f))[0].split('-')
    benchmark = benchmark.lower()
    folder_idx = os.path.dirname(f).rsplit('_', 1)[-1]
    d = pd.read_csv(f, header=None, names=['size', 'GFLOPS({})'.format(folder_idx)]).set_index('size')
    if distro not in results[benchmark]:
        results[benchmark][distro] = d
    else:
        results[benchmark][distro] = results[benchmark][distro].join(d)
        
for bmark, distros in results.items():
    for distro, values in distros.items():
        results[bmark][distro] = values.max(axis=1)
    values = OrderedDict(intel=results[bmark]['intel3'], anaconda=results[bmark]['anaconda3'], pip=results[bmark]['pip3'])
    results[bmark] = pd.DataFrame(index=results[bmark]['anaconda3'].index, data=values)
    results[bmark]['anaconda'] = results[bmark]['anaconda'] / results[bmark]['pip']
    results[bmark]['intel'] = results[bmark]['intel'] / results[bmark]['pip']
    del results[bmark]['pip']

In [None]:
results['dgemm'].plot(colormap='brg', logx=True)
plt.xlabel('Array dimension size')
plt.ylabel('GFLOPS_distro / GFLOPS_pip')
plt.title("DGEMM, parallel")

In [None]:
results['cholesky'].plot(colormap='brg', logx=True)
plt.xlabel('Array dimension size')
plt.ylabel('GFLOPS_distro / GFLOPS_pip')
plt.title("Cholesky, parallel")

In [None]:
results['numexpr'].plot(colormap='brg', logx=True)
plt.xlabel('Array size')
plt.ylabel('GFLOPS_distro / GFLOPS_pip')
plt.title("Numexpr, parallel")

In [None]:
results['fft'].plot(colormap='brg', logx=True)
plt.xlabel('Array size')
plt.ylabel('GFLOPS_distro / GFLOPS_pip')
plt.title("FFT, parallel")

In [None]:
results['svd_over'].plot(colormap='brg', logx=True)
plt.xlabel('Array dimension size')
plt.ylabel('GFLOPS_distro / GFLOPS_pip')
plt.title("SVD (overdetermined), single-threaded")

In [None]:
results['svd_under'].plot(colormap='brg', logx=True)
plt.xlabel('Array dimension size')
plt.ylabel('GFLOPS_distro / GFLOPS_pip')
plt.title("SVD (underdetermined), single-threaded")