Skip to content

Commit

Permalink
Merge pull request #1106 from biddisco/benchmark
Browse files Browse the repository at this point in the history
network_storage benchmark imporvements, adding legends to plots and tidying layout
  • Loading branch information
hkaiser committed Apr 4, 2014
2 parents e9ca618 + 262273d commit e58ed9b
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 89 deletions.
17 changes: 13 additions & 4 deletions tests/performance/network/network_storage/instructions.md
Expand Up @@ -52,26 +52,35 @@ The network_storage executable will produce several lines of output, but one lin
contains the condensed information needed by the plotting script.
This line begins with the text "CSVData" to indicate comma separated data values.
Whilst jobs are running or when they have completed, you can execute a command
such as
from the test root dir, such as
find . -name slurm.out -exec grep CSV {} \;
and a list of results generated from the jobs will be produced.
For plotting of results, the output should be directed into a file using
find . -name slurm.out -exec grep CSV {} \; >results-bgq-1a-2014-04-01.csv
Where you use a file name useful to your current experiment.
Where you use a file name applicable to your current experiment.
The generated file will be suitable for use by the plotting script.

Important Note:
By default, the slurm jobs generated by the script have the 'exclusive' flag set.
This is because the jobs are intended to test BW of the network and it is
often desirable to have only a single job running at a time.
The 'exclusive' flag only works (under slurm) if all jobs have the same name
so in your queue you will see many identical jobs and it will take a long time to drain.
If you can afford to run many job simultaneously, the job name may be set more
appropriately (details in script) and the exclusive flag removed.

------------------------------
plot-results.py
------------------------------
This is a python script which takes results generated by the test program
and plots a number of graphs for different parameter studies.
as described above and plots a number of graphs for different parameter studies.
The results.csv file generated contains BW measurements, timing ,thread, parcelport,
block size, etc information for the plots.
The python script parses the results and generates arrays (maps) of the data which
can be plotted in various ways. The scripts can be invoked as
plot-results.py results.csv
optional arguments such as the figure size can be found by looking at the script.
The outpur from the script will be a series of svg and png files for each of the plots
The output from the script will be a series of svg and png files for each of the plots
created.


223 changes: 138 additions & 85 deletions tests/performance/network/network_storage/plot-results.py
Expand Up @@ -31,10 +31,10 @@

#----------------------------------------------------------------------------
# convenience definitions to loop over all marker/colour styles
# when we have a lot of lines on the same graph
colors=itertools.cycle(('r','g','b','c','y','m','k'))
marker=itertools.cycle(('+', '.', 'o', '*', '^', 's', 'v', ',', '<', '>', '8', 's', 'p', 'h', 'H', 'D', 'd'))
filled_markers = itertools.cycle(('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd'))
# if we have a lot of lines on the same graph
colours = ('r','g','b','c','y','m','k')
markers = ('+', '.', 'o', '*', '^', 's', 'v', ',', '<', '>', '8', 's', 'p', 'h', 'H', 'D', 'd')
filled_markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')
#----------------------------------------------------------------------------

if (not options.show_graph) :
Expand All @@ -51,30 +51,31 @@
elif len(size) == 6 :
options.fig_size = (size[0:2], size[2:6])
else :
raise ValueError("--fig-size must be a string of 2 of 6 numbers")
raise ValueError("--fig-size must be a string of 2 or 6 numbers")
except :
options.fig_size = ([12, 9], [0.08, 0.14, 0.91, 0.83])
# options.fig_size = ([6, 8], [0.16, 0.22, 0.79, 0.77])

#----------------------------------------------------------------------------
def maximum(iterable, default):
'''Like max(), but returns a default value if xs is empty.'''
try:
return max(iterable)
except ValueError:
return default
'''Like max(), but returns a default value if xs is empty.'''
try:
return max(iterable)
except ValueError:
return default

#----------------------------------------------------------------------------
def minimum(iterable, default):
'''Like min(), but returns a default value if xs is empty.'''
try:
return min(iterable)
except ValueError:
return default
'''Like min(), but returns a default value if xs is empty.'''
try:
return min(iterable)
except ValueError:
return default

#----------------------------------------------------------------------------
def sizeof_bytes(num):
for x in ['bytes','KB','MB','GB','TB']:
'''Output a number as human readable bytes.'''
for x in ['bytes','KB','MB','GB','TB']:
if num < 1024.0:
return "%.0f %s" % (num, x)
num /= 1024.0
Expand All @@ -83,7 +84,7 @@ def sizeof_bytes(num):
# plot N series of data onto a single graph
# each series is a array, there are N arrays in the supplied map
# graph_map, a map of arrays of {x,y,other} data
# labels, {xaxis, yaxis, series_variable}
# labelstrings, {xaxis, yaxis, series_variable}
def plot_one_collection(graph_map, labelstrings, axes) :
print "Plotting %i graphs of '%s'" % (len(graph_map), labelstrings[2])
# need to find min and max values for x-axis
Expand All @@ -93,6 +94,9 @@ def plot_one_collection(graph_map, labelstrings, axes) :
# need to find min and max values for y-axis
y1 = 0
y2 = 5
# restart markers and colours from beginning of list for each new graph
localmarkers = itertools.cycle(markers)
localcolours = itertools.cycle(colours)
series_keys = sorted(graph_map.keys())
num_series = len(series_keys)
for index in range(len(series_keys)):
Expand All @@ -104,7 +108,7 @@ def plot_one_collection(graph_map, labelstrings, axes) :
# the values for plotting manually.
values = [[v[0],v[1]] for v in series]
#print "the values are ", values
axes.loglog(*zip(*values), basex=2, basey=2, markersize=8, marker=marker.next(), color=colors.next())
axes.loglog(*zip(*values), basex=2, basey=2, markersize=8, marker=localmarkers.next(), color=localcolours.next())
# track max x value for scaling of axes nicely
xvalues = sorted([x[0] for x in values])
# we want a nice factor of 2 for our axes limits
Expand All @@ -117,13 +121,15 @@ def plot_one_collection(graph_map, labelstrings, axes) :
axes.set_xlim(minimum(xlabels,1), maximum(xlabels,3)*1.5)
axes.set_xticklabels(xlabels)
axes.set_xlabel(labelstrings[0])
ylabels = tuple(i for i in (2**x for x in range(x1,x2+1)) )
# ylabels should also be automatic, but for now, do them by hand
# these should be GB/s or MB/s etc etc
ylabels = [0.25, 0.5,1,2,4,8,16]
#ylabels = [0.125, 0.25, 0.5,1,2,4,8,16]
# setup the yaxis parameters
axes.set_yscale('log', basey=2)
axes.set_ylim(0.25, 16 )
axes.set_yticklabels(ylabels)
axes.set_ylim(0.01, 16 )
#axes.set_yticklabels(ylabels)
axes.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, pos: str('%.2f' % x)))
axes.set_ylabel(labelstrings[1])
axes.tick_params(axis='x', which='major', labelsize=9)
#
Expand All @@ -138,70 +144,84 @@ def plot_one_collection(graph_map, labelstrings, axes) :
axes.set_title(labelstrings[2], fontsize=10)

#----------------------------------------------------------------------------
def plot_configuration(graph_map, axesnames, mapnames, titlefunction) :
def plot_configuration(graph_map, axesnames, mapnames, titlefunction, legendfunction) :

fig = plt.figure(figsize = options.fig_size[0])
axes = []

# the supplied graphs come as a 2D array of params
num_param1 = len(graph_map.keys())
num_param2 = len(graph_map[graph_map.keys()[0]].keys())
numrows = num_param1
numcols = num_param2


# All the graphs of param2 will be the same type,
# but we need one legend per param1 regardless
# so num_param2legend is used in places to add space for the extra legend plot
num_param2legend = num_param2+1
doLegend = True
numrows = num_param1
numcols = num_param2legend

# if the arrays is 1xN or Nx1, rearrange the num rows/cols
# to fit the page a little better instead of having one long line of plots
# to fit the page a little better instead of having one long row/column of plots
rearranged = False
if (num_param1==1) or (num_param2==1):
total = num_param1*num_param2
total = num_param1*num_param2legend
print "total is ", total
better = int(math.sqrt(total))
numrows = better
numcols = total/better
print "Rearranged graphs using layout %i x %i" % (numrows, numcols)

numcols = int(math.ceil(total/float(better)))
rearranged = True
print "Rearranged graphs from %i x %i using layout %i x %i" % (num_param1, num_param2, numrows, numcols)

# create an array of graphs for our parameter space
# grid cells are defined by {row, col} from top left and down
print "Creating array of graphs rows %i, cols %i" % (numrows, numcols)
for r in range(numrows):
for c in range(numcols):
axes.append( plt.subplot2grid((numrows, numcols), (r, c), colspan=1) )

# loop over input arrays/maps and create the actual plot data for each graph
plot_index = 0
row = 0
col = 0
graph_keys = sorted(graph_map.keys())
for param1_types in range(len(graph_keys)):
param1_key = graph_keys[param1_types]
param1_results = graph_map[param1_key]
param1_keys = sorted(param1_results.keys())
print "param1_ type ", param1_key
for param2_size in range(len(param1_keys)):
param2_key = param1_keys[param2_size]
param2_results = param1_results[param2_key]
param2_keys = sorted(param2_results.keys())
print "param2_ type ", param2_key
plot_one_collection(param2_results, [axesnames[0], axesnames[1], mapnames[1] + " " + titlefunction(param2_key)], axes[col + row*numcols])
col += 1
if (col>=numcols):
col = 0
row += 1

# legendIndexWrite = 3
# legendIndexRead = 7


# for line in range(3):
# axes[legendIndexWrite].plot([1], label=names[line], markersize=8, marker=markers[line/2],color=colors[line])
# for line in range(3,6):
# axes[legendIndexRead].plot([1], label=names[line], markersize=8, marker=markers[(line)/2],color=colors[line])

# axes[legendIndexWrite].axis('off')
# axes[legendIndexWrite].legend(loc='center', ncol=1, shadow=True, bbox_to_anchor=[0.5, 0.5], title=options.title, fontsize=12)
# l = axes[legendIndexWrite].legend(title=options.title)
# l.get_title().set_fontsize(15)
# axes[legendIndexRead].axis('off')
# axes[legendIndexRead].legend(loc='center', ncol=1, shadow=True, bbox_to_anchor=[0.5, 0.5], title=options.title, fontsize=12)
# l = axes[legendIndexRead].legend(title=options.title)
# l.get_title().set_fontsize(15)
for param1_i in range(num_param1):
param1_key = graph_keys[param1_i]
param1_results = graph_map[param1_key]
param1_keys = sorted(param1_results.keys())
print "param1_ type ", param1_key
for param2_i in range(num_param2):
param2_key = param1_keys[param2_i]
param2_results = param1_results[param2_key]
param2_keys = sorted(param2_results.keys())
print "param2_ type ", param2_key
newplot = plt.subplot2grid((numrows, numcols), (row, col), colspan=1)
axes.append( newplot )
print "generating plot at {%i,%i}" % (row, col)
plot_one_collection(param2_results,
[axesnames[0], axesnames[1], mapnames[1] + " " + titlefunction(param2_key)],
newplot)
col += 1
if ((col % numcols)==0):
col = 0
row += 1
# at the end of each param2 group, there should be a legend
leg = plt.subplot2grid((numrows, numcols), (row, col), colspan=1)
leg.axis('off')
axes.append( leg )
# restart markers and colours from beginning of list for each new graph
localmarkers = itertools.cycle(markers)
localcolours = itertools.cycle(colours)
for line in range(len(param2_results)):
leg.plot([], label=mapnames[2] + " " + legendfunction(param2_keys[line]),
markersize=8,
marker=localmarkers.next(),
color=localcolours.next())
leg.legend(loc = 'upper left', ncol=(1,2)[len(param2_results)>5],
fontsize=8,
handlelength=3, borderpad=1.2, labelspacing=1.2,
shadow=True)
print "added legend at {%i,%i}" % (row, col)
col += 1
# if we reach the end of the graph row
if ((col % numcols)==0):
col = 0
row += 1

plt.tight_layout()
if options.show_graph :
Expand All @@ -227,15 +247,27 @@ def insert_safe(a_map, key1, key2, key3, value) :
# read results data in and generate arrays/maps of values
# for each parcelport, threadcount, blocksize, ...
for csvfile in args :

# output file path for svg/png
base = os.path.splitext(csvfile)[0]
# empty list of graphs we will be fill for exporting
graphs_to_save = []

# open the CSV file
with open(csvfile) as f:
io = StringIO(f.read().replace(':', ','))
reader = csv.reader(io)

# to plot something not already included, add it to this list
Read_Net_Blocksize_Thread = {}
Write_Net_Blocksize_Thread = {}
Read_Net_Thread_Blocksize = {}
Write_Net_Thread_Blocksize = {}
Read_Net_Nodes_Blocksize_T = {}
Write_Net_Nodes_Blocksize_T = {}

# loop over the CSV file lines,
# if the CSV output is changed for the test, these offsets will need to be corrected
rownum = 0
for row in reader:
readflag = row[1].strip() in ("read")
Expand All @@ -249,37 +281,58 @@ def insert_safe(a_map, key1, key2, key3, value) :
BW = 1.0
#print "read=%i Network=%s Nodes=%4i Threads=%3i IOPsize=%9i IOPs=%6.1f BW=%6.1f" % (readflag, Network, Nodes, Threads, IOPsize, IOPs, BW)

# we use a map structure 3 deep with an array at the leaf,
# this allows us to store param1, param2, param3, {x,y}
# combinations of params cen be plotted against each other
# by rearranging the map levels and {x,y} vars.
if (readflag):
#print "Adding Read data ", [Nodes,BW]
insert_safe(Read_Net_Blocksize_Thread, Network, IOPsize, Threads, [Nodes,BW])
insert_safe(Read_Net_Thread_Blocksize, Network, Threads, IOPsize, [Nodes,BW])
insert_safe(Read_Net_Nodes_Blocksize_T, Network, Nodes, IOPsize, [Threads,BW])
insert_safe(Read_Net_Blocksize_Thread, Network, IOPsize, Threads, [Nodes,BW])
insert_safe(Read_Net_Thread_Blocksize, Network, Threads, IOPsize, [Nodes,BW])
insert_safe(Read_Net_Nodes_Blocksize_T, Network, Nodes, IOPsize, [Threads,BW])
else :
#print "Adding Write data ", [Nodes,BW]
insert_safe(Write_Net_Blocksize_Thread, Network, IOPsize, Threads, [Nodes,BW])
insert_safe(Write_Net_Thread_Blocksize, Network, Threads, IOPsize, [Nodes,BW])
insert_safe(Write_Net_Nodes_Blocksize_T, Network, Nodes, IOPsize, [Threads,BW])
insert_safe(Write_Net_Blocksize_Thread, Network, IOPsize, Threads, [Nodes,BW])
insert_safe(Write_Net_Thread_Blocksize, Network, Threads, IOPsize, [Nodes,BW])
insert_safe(Write_Net_Nodes_Blocksize_T, Network, Nodes, IOPsize, [Threads,BW])
rownum += 1

# output file path for svg/png
base = os.path.splitext(csvfile)[0]
graphs_to_save = []

# x-axis{Nodes}, y-axis{BW}, generate one graph per blocksize for each threadcount
fig_Read1 = plot_configuration(Read_Net_Blocksize_Thread, ["Nodes", "BW GB/s"], [Network, "Block size", "Threads"], sizeof_bytes)

# PLOT
# x-axis{Nodes}, y-axis{BW}, generate one graph per blocksize with series for each threadcount
fig_Read1 = plot_configuration(
Read_Net_Blocksize_Thread,
["Nodes", "BW GB/s"],
[Network, "Block size", "Threads"],
sizeof_bytes, # convert block size to KB/MB/TB etc
lambda x: str(x) # just print threads with no formatting
)
graphs_to_save.append([fig_Read1,"Read-by-block"])
#fig_Write1 = plot_configuration(Write_Net_Blocksize_Thread, ["Write", Network, "Block size", "Threads"])
#graphs_to_print.append([fig_Write1,"Write-by-block"])

# PLOT
# generate one graph per threadcount for each blocksize
fig_Read2 = plot_configuration(Read_Net_Thread_Blocksize, ["Nodes", "BW GB/s"], [Network, "Threads", "Block size"], lambda x: str(x))
fig_Read2 = plot_configuration(
Read_Net_Thread_Blocksize,
["Nodes", "BW GB/s"],
[Network, "Threads", "Block size"],
lambda x: str(x), # just print threads with no formatting
sizeof_bytes, # convert block size to KB/MB/TB etc
)
graphs_to_save.append([fig_Read2,"Read-by-thread"])

# PLOT
# generate one graph per node count for each blocksize
fig_Read3 = plot_configuration(Read_Net_Nodes_Blocksize_T, ["Threads", "BW GB/s"], [Network, "Nodes", "Block size"], lambda x: str(x))
fig_Read3 = plot_configuration(
Read_Net_Nodes_Blocksize_T,
["Threads", "BW GB/s"],
[Network, "Nodes", "Block size"],
lambda x: str(x), # just print threads with no formatting
sizeof_bytes, # convert block size to KB/MB/TB etc
)
graphs_to_save.append([fig_Read3,"Read-by-NodeBlock"])


# save plots to png and svg
for fig in graphs_to_save:
svg_name = base + "." + fig[1] + ".svg"
png_name = base + "." + fig[1] + ".png"
Expand Down

0 comments on commit e58ed9b

Please sign in to comment.