Merge pull request #1106 from biddisco/benchmark

network_storage benchmark imporvements, adding legends to plots and tidying layout
STEllAR-GROUP · Apr 4, 2014 · e58ed9b · e58ed9b
2 parents e9ca618 + 262273d
commit e58ed9b
Show file tree

Hide file tree

Showing 2 changed files with 151 additions and 89 deletions.
diff --git a/tests/performance/network/network_storage/instructions.md b/tests/performance/network/network_storage/instructions.md
@@ -52,26 +52,35 @@ The network_storage executable will produce several lines of output, but one lin
 contains the condensed information needed by the plotting script. 
 This line begins with the text "CSVData" to indicate comma separated data values.
 Whilst jobs are running or when they have completed, you can execute a command
-such as
+from the test root dir, such as
   find . -name slurm.out -exec grep CSV {} \;
 and a list of results generated from the jobs will be produced.
 For plotting of results, the output should be directed into a file using
   find . -name slurm.out -exec grep CSV {} \; >results-bgq-1a-2014-04-01.csv
-Where you use a file name useful to your current experiment.
+Where you use a file name applicable to your current experiment.
 The generated file will be suitable for use by the plotting script.
 
+Important Note:
+By default, the slurm jobs generated by the script have the 'exclusive' flag set. 
+This is because the jobs are intended to test BW of the network and it is 
+often desirable  to have only a single job running at a time.
+The 'exclusive' flag only works (under slurm) if all jobs have the same name
+so in your queue you will see many identical jobs and it will take a long time to drain.
+If you can afford to run many job simultaneously, the job name may be set more
+appropriately (details in script) and the exclusive flag removed.
+
 ------------------------------
 plot-results.py
 ------------------------------
 This is a python script which takes results generated by the test program
-and plots a number of graphs for different parameter studies.
+as described above and plots a number of graphs for different parameter studies.
 The results.csv file generated contains BW measurements, timing ,thread, parcelport,
 block size, etc information for the plots. 
 The python script parses the results and generates arrays (maps) of the data which
 can be plotted in various ways. The scripts can be invoked as
   plot-results.py results.csv
 optional arguments such as the figure size can be found by looking at the script.
-The outpur from the script will be a series of svg and png files for each of the plots
+The output from the script will be a series of svg and png files for each of the plots
 created.
 
 
diff --git a/tests/performance/network/network_storage/plot-results.py b/tests/performance/network/network_storage/plot-results.py
@@ -31,10 +31,10 @@
 
 #----------------------------------------------------------------------------
 # convenience definitions to loop over all marker/colour styles 
-# when we have a lot of lines on the same graph
-colors=itertools.cycle(('r','g','b','c','y','m','k'))
-marker=itertools.cycle(('+', '.', 'o', '*', '^', 's', 'v', ',', '<', '>', '8', 's', 'p', 'h', 'H', 'D', 'd')) 
-filled_markers = itertools.cycle(('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd'))
+# if we have a lot of lines on the same graph
+colours = ('r','g','b','c','y','m','k')
+markers = ('+', '.', 'o', '*', '^', 's', 'v', ',', '<', '>', '8', 's', 'p', 'h', 'H', 'D', 'd')
+filled_markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd')
 #----------------------------------------------------------------------------
 
 if (not options.show_graph) :
@@ -51,30 +51,31 @@
     elif len(size) == 6 :
         options.fig_size = (size[0:2], size[2:6])
     else :
-        raise ValueError("--fig-size must be a string of 2 of 6 numbers")
+        raise ValueError("--fig-size must be a string of 2 or 6 numbers")
 except :
     options.fig_size = ([12, 9], [0.08, 0.14, 0.91, 0.83])
 #    options.fig_size = ([6, 8], [0.16, 0.22, 0.79, 0.77])
 
 #----------------------------------------------------------------------------
 def maximum(iterable, default):
-    '''Like max(), but returns a default value if xs is empty.'''
-    try:
-        return max(iterable)
-    except ValueError:
-         return default
+  '''Like max(), but returns a default value if xs is empty.'''
+  try:
+      return max(iterable)
+  except ValueError:
+       return default
 
 #----------------------------------------------------------------------------
 def minimum(iterable, default):
-    '''Like min(), but returns a default value if xs is empty.'''
-    try:
-        return min(iterable)
-    except ValueError:
-         return default
-         
+  '''Like min(), but returns a default value if xs is empty.'''
+  try:
+      return min(iterable)
+  except ValueError:
+       return default
+
 #----------------------------------------------------------------------------
 def sizeof_bytes(num):
-    for x in ['bytes','KB','MB','GB','TB']:
+   '''Output a number as human readable bytes.'''
+   for x in ['bytes','KB','MB','GB','TB']:
         if num < 1024.0:
             return "%.0f %s" % (num, x)
         num /= 1024.0      
@@ -83,7 +84,7 @@ def sizeof_bytes(num):
 # plot N series of data onto a single graph
 # each series is a array, there are N arrays in the supplied map 
 # graph_map, a map of arrays of {x,y,other} data
-# labels, {xaxis, yaxis, series_variable} 
+# labelstrings, {xaxis, yaxis, series_variable} 
 def plot_one_collection(graph_map, labelstrings, axes) :
     print "Plotting %i graphs of '%s'" % (len(graph_map), labelstrings[2])
     # need to find min and max values for x-axis
@@ -93,6 +94,9 @@ def plot_one_collection(graph_map, labelstrings, axes) :
     # need to find min and max values for y-axis
     y1 = 0
     y2 = 5
+    # restart markers and colours from beginning of list for each new graph
+    localmarkers = itertools.cycle(markers)
+    localcolours = itertools.cycle(colours)
     series_keys = sorted(graph_map.keys())
     num_series = len(series_keys)
     for index in range(len(series_keys)):
@@ -104,7 +108,7 @@ def plot_one_collection(graph_map, labelstrings, axes) :
         # the values for plotting manually.
         values = [[v[0],v[1]] for v in series]
         #print "the values are ", values
-        axes.loglog(*zip(*values), basex=2, basey=2, markersize=8, marker=marker.next(), color=colors.next())
+        axes.loglog(*zip(*values), basex=2, basey=2, markersize=8, marker=localmarkers.next(), color=localcolours.next())
         # track max x value for scaling of axes nicely
         xvalues = sorted([x[0] for x in values])
         # we want a nice factor of 2 for our axes limits
@@ -117,13 +121,15 @@ def plot_one_collection(graph_map, labelstrings, axes) :
     axes.set_xlim(minimum(xlabels,1), maximum(xlabels,3)*1.5)
     axes.set_xticklabels(xlabels)
     axes.set_xlabel(labelstrings[0])
+    ylabels = tuple(i for i in (2**x for x in range(x1,x2+1)) )
     # ylabels should also be automatic, but for now, do them by hand
     # these should be GB/s or MB/s etc etc
-    ylabels = [0.25, 0.5,1,2,4,8,16]
+    #ylabels = [0.125, 0.25, 0.5,1,2,4,8,16]
     # setup the yaxis parameters
     axes.set_yscale('log', basey=2)
-    axes.set_ylim(0.25, 16 )
-    axes.set_yticklabels(ylabels)
+    axes.set_ylim(0.01, 16 )
+    #axes.set_yticklabels(ylabels)
+    axes.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, pos: str('%.2f' % x)))
     axes.set_ylabel(labelstrings[1])
     axes.tick_params(axis='x', which='major', labelsize=9)
     #
@@ -138,70 +144,84 @@ def plot_one_collection(graph_map, labelstrings, axes) :
     axes.set_title(labelstrings[2], fontsize=10)
 
 #----------------------------------------------------------------------------
-def plot_configuration(graph_map, axesnames, mapnames, titlefunction) :
+def plot_configuration(graph_map, axesnames, mapnames, titlefunction, legendfunction) :
 
     fig = plt.figure(figsize = options.fig_size[0])
     axes = []
 
     # the supplied graphs come as a 2D array of params
     num_param1 = len(graph_map.keys())
     num_param2 = len(graph_map[graph_map.keys()[0]].keys())
-    numrows = num_param1
-    numcols = num_param2
-
+
+    # All the graphs of param2 will be the same type, 
+    # but we need one legend per param1 regardless
+    # so num_param2legend is used in places to add space for the extra legend plot
+    num_param2legend = num_param2+1
+    doLegend         = True
+    numrows          = num_param1
+    numcols          = num_param2legend   
+
     # if the arrays is 1xN or Nx1, rearrange the num rows/cols
-    # to fit the page a little better instead of having one long line of plots
+    # to fit the page a little better instead of having one long row/column of plots
+    rearranged = False
     if (num_param1==1) or (num_param2==1):
-      total = num_param1*num_param2
+      total = num_param1*num_param2legend
+      print "total is ", total
       better = int(math.sqrt(total))
       numrows = better
-      numcols = total/better     
-      print "Rearranged graphs using layout %i x %i" % (numrows, numcols)
-
+      numcols = int(math.ceil(total/float(better)))
+      rearranged = True
+      print "Rearranged graphs from %i x %i using layout %i x %i" % (num_param1, num_param2, numrows, numcols)
+
     # create an array of graphs for our parameter space
     # grid cells are defined by {row, col} from top left and down
     print "Creating array of graphs rows %i, cols %i" % (numrows, numcols)
-    for r in range(numrows):
-      for c in range(numcols):
-        axes.append( plt.subplot2grid((numrows, numcols), (r, c), colspan=1) )
-
-    # loop over input arrays/maps and create the actual plot data for each graph
+    plot_index = 0
     row = 0
     col = 0
     graph_keys = sorted(graph_map.keys())
-    for param1_types in range(len(graph_keys)):
-        param1_key     = graph_keys[param1_types]
-        param1_results = graph_map[param1_key]
-        param1_keys    = sorted(param1_results.keys())
-        print "param1_ type ", param1_key
-        for param2_size in range(len(param1_keys)):
-            param2_key     = param1_keys[param2_size]
-            param2_results = param1_results[param2_key]
-            param2_keys    = sorted(param2_results.keys())
-            print "param2_ type ", param2_key
-            plot_one_collection(param2_results, [axesnames[0], axesnames[1], mapnames[1] + " " + titlefunction(param2_key)], axes[col + row*numcols])
-            col += 1
-            if (col>=numcols):
-              col = 0
-              row += 1
-
-    # legendIndexWrite = 3
-    # legendIndexRead  = 7
-
-
-    # for line in range(3):
-        # axes[legendIndexWrite].plot([1], label=names[line], markersize=8, marker=markers[line/2],color=colors[line])
-    # for line in range(3,6):
-        # axes[legendIndexRead].plot([1], label=names[line], markersize=8, marker=markers[(line)/2],color=colors[line])
-
-    # axes[legendIndexWrite].axis('off')
-    # axes[legendIndexWrite].legend(loc='center', ncol=1, shadow=True, bbox_to_anchor=[0.5, 0.5], title=options.title, fontsize=12)
-    # l = axes[legendIndexWrite].legend(title=options.title) 
-    # l.get_title().set_fontsize(15)
-    # axes[legendIndexRead].axis('off')
-    # axes[legendIndexRead].legend(loc='center', ncol=1, shadow=True, bbox_to_anchor=[0.5, 0.5], title=options.title, fontsize=12)
-    # l = axes[legendIndexRead].legend(title=options.title) 
-    # l.get_title().set_fontsize(15)
+    for param1_i in range(num_param1):
+      param1_key     = graph_keys[param1_i]
+      param1_results = graph_map[param1_key]
+      param1_keys    = sorted(param1_results.keys())
+      print "param1_ type ", param1_key
+      for param2_i in range(num_param2):
+        param2_key     = param1_keys[param2_i]
+        param2_results = param1_results[param2_key]
+        param2_keys    = sorted(param2_results.keys())
+        print "param2_ type ", param2_key
+        newplot = plt.subplot2grid((numrows, numcols), (row, col), colspan=1)
+        axes.append( newplot )
+        print "generating plot at {%i,%i}" % (row, col)
+        plot_one_collection(param2_results,
+          [axesnames[0], axesnames[1], mapnames[1] + " " + titlefunction(param2_key)],
+          newplot)
+        col += 1
+        if ((col % numcols)==0):
+          col = 0
+          row += 1
+      # at the end of each param2 group, there should be a legend
+      leg = plt.subplot2grid((numrows, numcols), (row, col), colspan=1)
+      leg.axis('off')
+      axes.append( leg )
+      # restart markers and colours from beginning of list for each new graph
+      localmarkers = itertools.cycle(markers)
+      localcolours = itertools.cycle(colours)
+      for line in range(len(param2_results)):
+        leg.plot([], label=mapnames[2] + " " + legendfunction(param2_keys[line]), 
+        markersize=8, 
+        marker=localmarkers.next(),
+        color=localcolours.next())
+      leg.legend(loc = 'upper left', ncol=(1,2)[len(param2_results)>5], 
+        fontsize=8,
+        handlelength=3, borderpad=1.2, labelspacing=1.2,
+        shadow=True)
+      print "added legend at {%i,%i}" % (row, col)
+      col += 1
+      # if we reach the end of the graph row
+      if ((col % numcols)==0):
+        col = 0
+        row += 1
 
     plt.tight_layout()
     if options.show_graph :
@@ -227,15 +247,27 @@ def insert_safe(a_map, key1, key2, key3, value) :
 # read results data in and generate arrays/maps of values
 # for each parcelport, threadcount, blocksize, ...
 for csvfile in args :
+
+    # output file path for svg/png
+    base = os.path.splitext(csvfile)[0]
+    # empty list of graphs we will be fill for exporting
+    graphs_to_save = []
+
+    # open the CSV file
     with open(csvfile) as f:
       io = StringIO(f.read().replace(':', ','))
       reader = csv.reader(io)
+
+      # to plot something not already included, add it to this list
       Read_Net_Blocksize_Thread   = {}
       Write_Net_Blocksize_Thread  = {}
       Read_Net_Thread_Blocksize   = {}
       Write_Net_Thread_Blocksize  = {}
       Read_Net_Nodes_Blocksize_T  = {}
       Write_Net_Nodes_Blocksize_T = {}
+
+      # loop over the CSV file lines, 
+      # if the CSV output is changed for the test, these offsets will need to be corrected
       rownum = 0
       for row in reader:
           readflag = row[1].strip() in ("read")
@@ -249,37 +281,58 @@ def insert_safe(a_map, key1, key2, key3, value) :
             BW = 1.0
           #print "read=%i Network=%s Nodes=%4i Threads=%3i IOPsize=%9i IOPs=%6.1f BW=%6.1f"  % (readflag, Network, Nodes, Threads, IOPsize, IOPs, BW)
 
+          # we use a map structure 3 deep with an array at the leaf, 
+          # this allows us to store param1, param2, param3, {x,y} 
+          # combinations of params cen be plotted against each other
+          # by rearranging the map levels and {x,y} vars.
           if (readflag):
             #print "Adding Read data ", [Nodes,BW]
-            insert_safe(Read_Net_Blocksize_Thread, Network, IOPsize, Threads, [Nodes,BW])
-            insert_safe(Read_Net_Thread_Blocksize, Network, Threads, IOPsize, [Nodes,BW])
-            insert_safe(Read_Net_Nodes_Blocksize_T, Network, Nodes,  IOPsize, [Threads,BW])
+            insert_safe(Read_Net_Blocksize_Thread,  Network, IOPsize, Threads, [Nodes,BW])
+            insert_safe(Read_Net_Thread_Blocksize,  Network, Threads, IOPsize, [Nodes,BW])
+            insert_safe(Read_Net_Nodes_Blocksize_T, Network, Nodes,   IOPsize, [Threads,BW])
           else :
             #print "Adding Write data ", [Nodes,BW]
-            insert_safe(Write_Net_Blocksize_Thread, Network, IOPsize, Threads, [Nodes,BW])
-            insert_safe(Write_Net_Thread_Blocksize, Network, Threads, IOPsize, [Nodes,BW])
-            insert_safe(Write_Net_Nodes_Blocksize_T, Network, Nodes,  IOPsize, [Threads,BW])
+            insert_safe(Write_Net_Blocksize_Thread,  Network, IOPsize, Threads, [Nodes,BW])
+            insert_safe(Write_Net_Thread_Blocksize,  Network, Threads, IOPsize, [Nodes,BW])
+            insert_safe(Write_Net_Nodes_Blocksize_T, Network, Nodes,   IOPsize, [Threads,BW])
           rownum += 1
-
-    # output file path for svg/png
-    base = os.path.splitext(csvfile)[0]
-    graphs_to_save = []
-
-    # x-axis{Nodes}, y-axis{BW}, generate one graph per blocksize for each threadcount
-    fig_Read1  = plot_configuration(Read_Net_Blocksize_Thread, ["Nodes", "BW GB/s"], [Network, "Block size", "Threads"], sizeof_bytes)
+
+    # PLOT
+    # x-axis{Nodes}, y-axis{BW}, generate one graph per blocksize with series for each threadcount
+    fig_Read1  = plot_configuration(
+      Read_Net_Blocksize_Thread, 
+      ["Nodes", "BW GB/s"], 
+      [Network, "Block size", "Threads"], 
+      sizeof_bytes,    # convert block size to KB/MB/TB etc
+      lambda x: str(x) # just print threads with no formatting
+      )
     graphs_to_save.append([fig_Read1,"Read-by-block"])
     #fig_Write1 = plot_configuration(Write_Net_Blocksize_Thread, ["Write", Network, "Block size", "Threads"])
     #graphs_to_print.append([fig_Write1,"Write-by-block"])
 
+    # PLOT
     # generate one graph per threadcount for each blocksize
-    fig_Read2  = plot_configuration(Read_Net_Thread_Blocksize, ["Nodes", "BW GB/s"], [Network, "Threads", "Block size"], lambda x: str(x))
+    fig_Read2  = plot_configuration(
+      Read_Net_Thread_Blocksize, 
+      ["Nodes", "BW GB/s"], 
+      [Network, "Threads", "Block size"], 
+      lambda x: str(x), # just print threads with no formatting
+      sizeof_bytes,     # convert block size to KB/MB/TB etc
+      )
     graphs_to_save.append([fig_Read2,"Read-by-thread"])
 
+    # PLOT
     # generate one graph per node count for each blocksize
-    fig_Read3  = plot_configuration(Read_Net_Nodes_Blocksize_T, ["Threads", "BW GB/s"], [Network, "Nodes", "Block size"], lambda x: str(x))
+    fig_Read3  = plot_configuration(
+      Read_Net_Nodes_Blocksize_T, 
+      ["Threads", "BW GB/s"], 
+      [Network, "Nodes", "Block size"], 
+      lambda x: str(x), # just print threads with no formatting
+      sizeof_bytes,     # convert block size to KB/MB/TB etc
+      )
     graphs_to_save.append([fig_Read3,"Read-by-NodeBlock"])
 
-
+    # save plots to png and svg    
     for fig in graphs_to_save:
       svg_name = base + "." + fig[1] + ".svg"
       png_name = base + "." + fig[1] + ".png"