In [1]:
import numpy as np
import pandas as pd
import pylab as pl

In [2]:
import re

# Extracts the master time, elapsed time and cpu percentage
# from a serial run output file
#
# params:
# moves: (int)order of magnitude of the number of moves of the file
# path: (string)path of the text files
#
# return:
# tuple of lists (master_time, elapsed_time, cpu%)
# of all the times and cpu% found in the file
def extract_serial_data(moves, path = "../data/"):
    fname = path + "serial.1" + "0"*moves
    f = open(fname, "r")
    master_time = []
    elapsed_time = []
    cpu_percentage = []
    
    for line in f:
        if(line.find("walltime")>0):
            master_time.append(float(line.split()[-1]))
        if(line.find("elapsed")>0):
            split = re.split(' |elapsed |:|%CPU', line)
            elapsed_time.append(float(split[2])*60 + float(split[3]))
            cpu_percentage.append(float(split[4]))
    
    f.close()
    
    return (master_time, elapsed_time, cpu_percentage)


# Extracts the master time, elapsed time and cpu percentage
# from a parallel run output file
#
# params:
# runtype: (string)"weak" or "strong", type of the run to exctract the data
# procs: (int)number of processors of the file
# moves: (int)order of magnitude of the number of moves of the file
# path: (string)path of the text files
#
# return:
# tuple of lists (master_time, elapsed_time, cpu%)
# of all the times and cpu% found in the file
def extract_parallel_data(runtype, procs, moves, path = "../data/"):
    fname = path+runtype+"."+str(procs)+".1"+"0"*moves
    f = open(fname, "r")
    master_time = []
    elapsed_time = []
    cpu_percentage = []
    times = []
    
    for line in f:
        if(line.find("walltime on")>0):
            times.append(float(line.split()[-1]))
        if(line.find("elapsed")>0):
            split = re.split(' |elapsed |:|%CPU', line)
            elapsed_time.append(float(split[2])*60 + float(split[3]))
            cpu_percentage.append(float(split[4]))
            master_time.append(max(times))
            times = []
    
    f.close()
    
    return (master_time, elapsed_time, cpu_percentage)

In [3]:
# Prints the tuple or list data in a csv format
def csv_print(f, data):
    line = ""
    
    for d in data[0:-1]:
        line = line+str(d)+","
    line = line+str(data[-1])+"\n"
    
    f.write(line)

# I create the csv files containing the runtimes of weak and strong scalability tests
runtypes = ["strong", "weak"]
procs = [1]
procs.extend(list(range(4, 52, 4)))
moves = list(range(8, 12))

for run in runtypes:
    for move in moves:
        
        if(move<10):
            f = open(run+"-scalability-10to0"+str(move)+".csv", "w")
        else:
            f = open(run+"-scalability-10to"+str(move)+".csv", "w")
        f.write("#header line: GPU processors,avg,error_bar,run1,run2,run3\n")
        
        for proc in procs:
            # In case of weak with 1e11 moves we only have runs with 1,12,24,48 processors
            if(run=="weak" and move==11 and (proc not in [1,12,24,48])):
                continue
            
            master_time = extract_parallel_data(run, proc, move)[0]
            mean = np.mean(master_time)
            err = (max(master_time)-min(master_time))*0.5
            master_time.insert(0, err)
            master_time.insert(0, mean)
            csv_print(f, master_time)
        
        f.close()

In [4]:
# Speedup of the sum of N numbers

def Ts(N, Tread=1e-4, Tcomp=2e-9):
    return Tread + N*Tcomp

def Tp_naive(N, p, Tread=1e-4, Tcomp=2e-9, Tcomm=1e-6):
    return Tread + 2*(p-1)*Tcomm + (N/p + p-1)*Tcomp

def Tp_enhanced(N, p, Tread=1e-4, Tcomp=2e-9, Tcomm=1e-6):
    return Tread + 2*np.ceil(np.log2(p))*Tcomm + (N/p + p-1)*Tcomp
    
p = np.arange(1, 101, dtype=float)
N = np.array([2e4, 1e5, 2e5, 1e6, 2e7])
max_speedup = []
for n in N:
    speedup = Ts(n)/Tp_enhanced(n, p)
    max_speedup.append(np.argmax(speedup)+1)

max_speedup

[16, 64, 100, 100, 100]

In [5]:
# File of the speedup of the sum

p = np.arange(1, 101, dtype=float)
N = np.array([2e4, 1e5, 2e5, 1e6, 2e7])

f = open("performance-model.csv", "w")
f.write("#header: N, best P naive algorithm , best P for enhanced algorithm\n")
for n in N:
    data = [int(n)]
    speedup = Ts(n)/Tp_naive(n, p)
    data.append(np.argmax(speedup)+1)
    speedup = Ts(n)/Tp_enhanced(n, p)
    data.append(np.argmax(speedup)+1)
    csv_print(f, data)
    
f.close()

In [32]:
# Plot of the elapsed and internal time of strong scaling

%matplotlib qt
import menzalib as mz

def plot_times(speedup=False, name="", Ts=2.63, dTs=0.01):
    if(speedup):
        f = lambda x, Ts: Ts/x
        df = lambda x, dx, Ts: mz.drapp(Ts, Ts*0.003, x, dx)
    else:
        f = lambda x, Ts: x
        df = lambda x, dx, Ts: dx
        
    procs = [1]
    procs.extend(list(range(4, 52, 4)))
    moves = list(range(8, 12))

    time = np.empty((len(procs), 2))
    dtime = np.empty((len(procs), 2))

    fig, axs = pl.subplots(1, 2)
    ax1, ax2 = axs

    for move in moves:
        for (j, proc) in enumerate(procs):
            data = extract_parallel_data("strong", proc, move)[0:2]
            time[j] = np.mean(data, 1)
            dtime[j] = (np.max(data, 1)-np.min(data, 1))/2
        ax1.errorbar(procs, f(time[:,0], Ts), df(time[:,0], dtime[:,0], Ts),
                     fmt=".-", label="Moves=10to"+str(move))
        ax2.errorbar(procs, f(time[:,1], Ts), df(time[:,1], dtime[:,1], Ts),
                     fmt=".-", label="Moves=10to"+str(move))
        Ts*=10

    ax1.set_xscale("log")
    ax1.set_yscale("log")
    ax1.set_xlabel("processors P")
    if(speedup): ax1.set_ylabel("Speedup S(P)")
    else: ax1.set_ylabel("Time [s]")
    ax1.set_title("Internal time")
    ax1.legend()

    ax2.set_xscale("log")
    ax2.set_yscale("log")
    ax2.set_xlabel("processors P")
    if(speedup): ax2.set_ylabel("Speedup S(P)")
    else: ax2.set_ylabel("Time [s]")
    ax2.set_title("Elapsed time")
    ax2.legend()
    pl.tight_layout()
    if(name!=""):
        pl.savefig(name, dpi=300)
    fig.show()


plot_times(False, "strong_scaling_times.png", )
#pd.DataFrame(strong, index=moves, columns=procs)

In [29]:
# Plot of the strong scalability

%matplotlib qt
import menzalib as mz

def plot_times(speedup=False, name=""):
    if(speedup):
        f = lambda x, Ts: Ts/x
        df = lambda x, dx, Ts: mz.drapp(Ts, Ts*0.003, x, dx)
    else:
        f = lambda x, Ts: x
        df = lambda x, dx, Ts: dx
        
    procs = [1]
    procs.extend(list(range(4, 52, 4)))
    moves = list(range(8, 12))

    time = np.empty((len(procs), 2))
    dtime = np.empty((len(procs), 2))

    fig, ax1 = pl.subplots(1, 1)
    
    for move in moves:
        tmp1 = np.mean(extract_serial_data(move)[1])
        tmp2 = np.mean(extract_parallel_data("strong", 1, move)[1])
        Ts = np.min([tmp1, tmp2])
        
        for (j, proc) in enumerate(procs):
            data = extract_parallel_data("strong", proc, move)[0:2]
            time[j] = np.mean(data, 1)
            dtime[j] = (np.max(data, 1)-np.min(data, 1))/2
        ax1.errorbar(procs, f(time[:,1], Ts), df(time[:,1], dtime[:,1], Ts),
                     fmt=".-", label="Moves=10to"+str(move))
    
    ax1.errorbar(procs, procs, fmt=".-", label="Linear scaling")
    ax1.set_xscale("log")
    ax1.set_yscale("log")
    ax1.set_xlabel("processors P")
    if(speedup): ax1.set_ylabel("Speedup S(P)")
    else: ax1.set_ylabel("Time [s]")
    ax1.set_title("Strong scaling")
    ax1.legend()
    
    pl.tight_layout()
    if(name!=""):
        pl.savefig(name, dpi=300)
    fig.show()


plot_times(True, "strong_scaling_speedup.png", )
#pd.DataFrame(strong, index=moves, columns=procs)

In [8]:
procs = [1]
procs.extend(list(range(4, 52, 4)))
moves = list(range(8, 12))

speedup = np.empty(len(procs))

for move in moves:
    tmp1 = np.mean(extract_serial_data(move)[1])
    tmp2 = np.mean(extract_parallel_data("strong", 1, move)[1])
    Ts = np.min([tmp1, tmp2])

    for (j, proc) in enumerate(procs):
        data = extract_parallel_data("strong", proc, move)[1]
        speedup[j] = Ts/np.mean(data)
    print(procs[np.argmax(speedup)], np.max(speedup))

plot_times(True, "strong_scaling_speedup.png", )
#pd.DataFrame(strong, index=moves, columns=procs)

16 4.2430939226519335
36 15.189453125
48 33.203185535944904
48 39.06597134322313


In [33]:
# Model for the parallel overhead

procs = [1]
procs.extend(list(range(4, 52, 4)))
moves = list(range(8, 12))
time = np.empty((len(moves),len(procs)))
dtime = np.empty((len(moves),len(procs)))

pl.figure()
for (i, move) in enumerate(moves):
    Ts = np.mean(extract_serial_data(move)[1])
    for (j, proc) in enumerate(procs):
        data = extract_parallel_data("strong", proc, move)[1]
        time[i, j] = np.mean(data) - Ts/proc
        dtime[i, j] = (max(data)-min(data))/2 - Ts*0.003/proc
    dtime[-1,0] = 0.3
    pl.errorbar(procs, time[i, :], dtime[i, :],
                fmt=".-", label="Moves=10to"+str(move))

pl.errorbar(procs, [0]*len(procs),
            fmt=".-", label="Linear scaling", color="black")
pl.xlabel("Processors P")
pl.ylabel("T overhead [s]")
pl.yscale("log")
pl.title("Parallel overhead")
pl.legend()
pl.show()
pl.tight_layout()
pl.savefig("parallel_overhead_time.png", dpi=300)
print(time)

[[ 0.28        0.30333333  0.34        0.39666667  0.44333333  0.50866667
   0.57333333  0.7252381   0.78333333  0.90555556  1.036       1.15515152
   1.31333333]
 [ 0.06        0.3525      0.32958333  0.44638889  0.46979167  0.54383333
   0.62986111  0.83083333  0.90989583  0.98657407  1.11525     1.35416667
   1.39659722]
 [ 1.15666667  0.4875      1.14208333  0.98472222  0.844375    1.08816667
   1.42402778  2.07440476  2.1621875   2.16824074  3.34241667  2.35007576
   2.38701389]
 [ 1.05333333  1.92        2.50666667  2.46888889  3.585       6.718
   5.66111111 13.80714286 14.3925     13.87296296 13.83066667 13.10272727
  12.25388889]]


In [70]:
from scipy.optimize import curve_fit

def f(x, Tmpi, k):
    return Tmpi + k*(x-1)

y = time[-3, :]
x = np.array(procs)
dy = dtime[-3, :]
popt, pcov = curve_fit(f, x, y, [0.3, 0.1], bounds=(0, 1))
print(popt, pcov)

pl.errorbar(x, y, dy)
pl.plot(x, f(x, *popt))

[1.20467865 0.03301379] [[ 9.49379775e-02 -2.36257952e-03]
 [-2.36257952e-03  6.47519979e-05]]


[<matplotlib.lines.Line2D at 0x7fc699bcd5e0>]

In [68]:
def model1(x, N=1e11):
    k = 0.03
    Tmpi = 0.28
    return Tmpi + (x-1)*k


y = time[-1, :]
dy = dtime[-1, :]
x = np.array(procs)
pl.errorbar(x, y, dy)
pl.plot(procs, model1(x, 1e11))
pl.yscale("log")

In [27]:
# Weak scaling runtime and efficency plot

# Plot of the strong scalability

%matplotlib qt
import menzalib as mz
        
procs = [1]
procs.extend(list(range(4, 52, 4)))
moves = list(range(8, 12))

time = np.empty(len(procs))
dtime = np.empty(len(procs))

fig, axs = pl.subplots(1, 2)
ax1, ax2 = axs

for move in moves[:-1]:
    tmp1 = np.mean(extract_serial_data(move)[1])
    tmp2 = np.mean(extract_parallel_data("strong", 1, move)[1])
    Ts = np.min([tmp1, tmp2])

    for (j, proc) in enumerate(procs):
        data = extract_parallel_data("weak", proc, move)[1]
        time[j] = np.mean(data)
        dtime[j] = (np.max(data)-np.min(data))/2
    
    ax1.errorbar(procs, time, dtime, fmt=".-", label="Moves=10to"+str(move))
    ax2.errorbar(procs, Ts/time, mz.drapp(Ts, 0.003*Ts, time, dtime), fmt=".-", label="Moves=10to"+str(move))

weak_procs = [1,12,24,48]
time = np.empty(len(weak_procs))
dtime = np.empty(len(weak_procs))
tmp1 = np.mean(extract_serial_data(11)[1])
tmp2 = np.mean(extract_parallel_data("strong", 1, 11)[1])
Ts = np.min([tmp1, tmp2])
for (j, proc) in enumerate(weak_procs):
    data = extract_parallel_data("weak", proc, 11)[1]
    time[j] = np.mean(data)
    dtime[j] = (np.max(data)-np.min(data))/2
    
ax1.errorbar(weak_procs, time, dtime, fmt=".-", label="Moves=10to"+str(move))
ax2.errorbar(weak_procs, Ts/time, mz.drapp(Ts, 0.003*Ts, time, dtime), fmt=".-", label="Moves=10to"+str(move))

ax1.set_yscale("log")
ax1.set_xlabel("processors P")
ax1.set_ylabel("Time [s]")
ax1.set_title("Weak runtime")
ax1.legend()


ax2.set_xlabel("processors P")
ax2.set_ylabel("Efficency $\epsilon$(P)")
ax2.set_ylabel("$T_S/T_P$")
ax2.set_title("Weak efficency")
ax2.legend()

pl.tight_layout()
pl.savefig("weak_scaling.png", dpi=300)
fig.show()