In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
from htrace import Parse
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

%config InlineBackend.figure_formats = ['svg']
sns.set_style("whitegrid")

In [73]:
data_path={
    'dfsioe_r': 'data/dfsioe_r/',
    'dfsioe_w': 'data/dfsioe_w/',
    'kmeans': 'data/kmeans/',
    'pagerank': 'data/pagerank/',
    'terasort': 'data/terasort/',
    'wordcount': 'data/wordcount/'
}
samplers = ['a', 'l', 't', 'p0.1', 'p0.01']
zh_samplers = ['全采样','限制采样','令牌桶采样','概率采样(0.1)','概率采样(0.01)']

## 不同函数的采样个数对比

In [81]:
def clean_src_csv(path):
    df = pd.read_csv(path+'/1.csv')
    df.index = df['Unnamed: 0']
    df.drop(['Unnamed: 0'],axis=1, inplace=True)
    df = df.T
    df.columns = ['count','mean','std','min','25%','50%','75%','max']
    return df['count'].to_frame() 

In [90]:
def bench_func_display(bench):
    res = []
    for sampler in samplers:
        res.append(clean_src_csv(bench+sampler))
    c = pd.concat(res, axis=1, sort=True)
    c.columns = zh_samplers
    return c[(c['全采样']<10000)&(c['全采样']>10)]

In [91]:
for name, bench in data_path.items():
    print(bench)
    bench_func_display(bench)

data/dfsioe_r/


Unnamed: 0,全采样,限制采样,令牌桶采样,概率采样(0.1),概率采样(0.01)
BlockReaderRemote2#readNextPacket,5184.0,131.0,73.0,501.0,38.0
ClientNamenodeProtocol#addBlock,35.0,35.0,35.0,4.0,1.0
ClientNamenodeProtocol#complete,37.0,37.0,37.0,3.0,1.0
ClientNamenodeProtocol#create,37.0,37.0,37.0,2.0,
ClientNamenodeProtocol#getBlockLocations,70.0,70.0,70.0,6.0,1.0
ClientNamenodeProtocol#getFileInfo,81.0,81.0,81.0,8.0,
DFSInputStream#fetchBlockAt,68.0,11.0,5.0,5.0,
DFSInputStream#openInfo,69.0,69.0,69.0,6.0,1.0
DFSOutputStream#close,37.0,37.0,37.0,3.0,1.0
DFSOutputStream#flushInternal,37.0,37.0,37.0,3.0,1.0


data/dfsioe_w/


Unnamed: 0,全采样,限制采样,令牌桶采样,概率采样(0.1),概率采样(0.01)
BlockReaderRemote2#readNextPacket,64.0,45.0,64.0,6.0,1.0
ClientNamenodeProtocol#addBlock,67.0,40.0,36.0,9.0,2.0
ClientNamenodeProtocol#complete,69.0,69.0,69.0,9.0,2.0
ClientNamenodeProtocol#create,69.0,69.0,69.0,6.0,
ClientNamenodeProtocol#getBlockLocations,38.0,38.0,38.0,3.0,
ClientNamenodeProtocol#getFileInfo,81.0,81.0,81.0,9.0,1.0
DFSInputStream#byteArrayRead,1139.0,826.0,1136.0,96.0,17.0
DFSInputStream#fetchBlockAt,36.0,34.0,36.0,3.0,1.0
DFSInputStream#openInfo,37.0,37.0,37.0,3.0,
DFSInputStream#readWithStrategy,1139.0,826.0,1136.0,96.0,17.0


data/kmeans/


Unnamed: 0,全采样,限制采样,令牌桶采样,概率采样(0.1),概率采样(0.01)
BlockReaderRemote2#readNextPacket,251.0,42.0,22.0,21.0,3.0
ClientNamenodeProtocol#addBlock,22.0,22.0,22.0,4.0,
ClientNamenodeProtocol#complete,28.0,28.0,28.0,3.0,
ClientNamenodeProtocol#create,28.0,28.0,28.0,4.0,
ClientNamenodeProtocol#getBlockLocations,131.0,131.0,131.0,16.0,1.0
ClientNamenodeProtocol#getFileInfo,219.0,219.0,219.0,19.0,4.0
ClientNamenodeProtocol#getListing,73.0,73.0,73.0,10.0,1.0
ClientNamenodeProtocol#rename,21.0,21.0,21.0,3.0,
DFSClient#rename,21.0,21.0,21.0,3.0,
DFSInputStream#fetchBlockAt,131.0,41.0,22.0,11.0,1.0


data/pagerank/


Unnamed: 0,全采样,限制采样,令牌桶采样,概率采样(0.1),概率采样(0.01)
BlockReaderRemote2#readNextPacket,3749.0,133.0,114.0,364.0,37.0
ClientNamenodeProtocol#addBlock,241.0,129.0,127.0,33.0,2.0
ClientNamenodeProtocol#complete,247.0,247.0,247.0,32.0,2.0
ClientNamenodeProtocol#create,247.0,247.0,247.0,31.0,4.0
ClientNamenodeProtocol#delete,12.0,12.0,12.0,1.0,
ClientNamenodeProtocol#getBlockLocations,321.0,301.0,321.0,27.0,3.0
ClientNamenodeProtocol#getFileInfo,1537.0,1024.0,1537.0,157.0,7.0
ClientNamenodeProtocol#getListing,255.0,91.0,255.0,28.0,5.0
ClientNamenodeProtocol#rename,483.0,483.0,483.0,52.0,2.0
DFSClient#delete,12.0,12.0,12.0,1.0,


data/terasort/


Unnamed: 0,全采样,限制采样,令牌桶采样,概率采样(0.1),概率采样(0.01)
BlockReaderRemote2#readNextPacket,643.0,12.0,3.0,72.0,8.0
ClientNamenodeProtocol#addBlock,41.0,7.0,11.0,3.0,
ClientNamenodeProtocol#complete,42.0,42.0,42.0,6.0,1.0
ClientNamenodeProtocol#create,42.0,42.0,42.0,4.0,1.0
ClientNamenodeProtocol#fsync,40.0,40.0,40.0,3.0,1.0
ClientNamenodeProtocol#getFileInfo,215.0,213.0,215.0,24.0,4.0
ClientNamenodeProtocol#getListing,43.0,43.0,43.0,3.0,
ClientNamenodeProtocol#rename,80.0,80.0,80.0,9.0,1.0
DFSClient#rename,80.0,80.0,80.0,9.0,1.0
DFSOutputStream#close,42.0,42.0,42.0,6.0,1.0


data/wordcount/


Unnamed: 0,全采样,限制采样,令牌桶采样,概率采样(0.1),概率采样(0.01)
BlockReaderRemote2#readNextPacket,126.0,5.0,2.0,8.0,1.0
ClientNamenodeProtocol#addBlock,40.0,40.0,40.0,2.0,
ClientNamenodeProtocol#complete,41.0,41.0,41.0,2.0,
ClientNamenodeProtocol#create,41.0,41.0,41.0,5.0,1.0
ClientNamenodeProtocol#getFileInfo,204.0,204.0,204.0,21.0,4.0
ClientNamenodeProtocol#getListing,42.0,42.0,42.0,1.0,
ClientNamenodeProtocol#rename,80.0,80.0,80.0,5.0,2.0
DFSClient#rename,80.0,80.0,80.0,5.0,2.0
DFSOutputStream#close,41.0,41.0,41.0,2.0,
DFSOutputStream#flushInternal,41.0,41.0,41.0,2.0,
