In [293]:
import numpy as np
import plotly as py
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [294]:
# read csv file
bt = pd.read_csv('Data/bt.csv', sep=';')	
sp = pd.read_csv('Data/sp.csv', sep=';')
lu = pd.read_csv('Data/lu.csv', sep=';')
# count for number of plots
c = 1

# Data treatment

In [295]:
bt.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3
0,4,8,24,bt-mz.D.8,00:02:25,00:02:24,00:02:25
1,4,12,16,bt-mz.D.12,00:01:52,00:01:52,00:01:52
2,4,16,12,bt-mz.D.16,00:01:51,00:01:50,00:01:50
3,4,24,8,bt-mz.D.24,00:01:39,00:01:39,00:01:39
4,4,32,6,bt-mz.D.32,00:01:46,00:01:45,00:01:45


In [296]:
sp.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3
0,4,8,24,sp-mz.D.8,00:01:29,00:01:30,00:01:30
1,4,12,16,sp-mz.D.12,00:01:22,00:01:20,00:01:22
2,4,16,12,sp-mz.D.16,00:01:06,00:01:07,00:01:08
3,4,24,8,sp-mz.D.24,00:00:59,00:00:57,00:00:57
4,4,32,6,sp-mz.D.32,00:00:58,00:00:58,00:00:59


In [297]:
lu.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3
0,4,8,24,lu-mz.D.8,00:02:31,00:02:30,00:02:32
1,4,12,16,lu-mz.D.12,00:23:33,00:23:40,00:23:25
2,4,16,12,lu-mz.D.16,00:02:36,00:02:37,00:02:38
3,6,12,24,lu-mz.D.12,00:18:19,00:18:27,00:18:37
4,8,16,24,lu-mz.D.16,00:01:17,00:01:16,00:01:16


In [298]:
# change 'partition' column to string
bt['partition'] = bt['partition'].astype(str)
sp['partition'] = sp['partition'].astype(str)
lu['partition'] = lu['partition'].astype(str)

In [299]:
# change elapsed1, elapsed2, elapsed3 to float in seconds
for n in range(0,3):
    bt[f'elapsed{n+1}'] = bt[f'elapsed{n+1}'].apply(lambda x: sum(int(i) * 60 ** (2-j) for j, i in enumerate(x.split(':'))))
    sp[f'elapsed{n+1}'] = sp[f'elapsed{n+1}'].apply(lambda x: sum(int(i) * 60 ** (2-j) for j, i in enumerate(x.split(':'))))
    lu[f'elapsed{n+1}'] = lu[f'elapsed{n+1}'].apply(lambda x: sum(int(i) * 60 ** (2-j) for j, i in enumerate(x.split(':'))))


In [300]:
bt.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3
0,4,8,24,bt-mz.D.8,145,144,145
1,4,12,16,bt-mz.D.12,112,112,112
2,4,16,12,bt-mz.D.16,111,110,110
3,4,24,8,bt-mz.D.24,99,99,99
4,4,32,6,bt-mz.D.32,106,105,105


In [301]:
sp.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3
0,4,8,24,sp-mz.D.8,89,90,90
1,4,12,16,sp-mz.D.12,82,80,82
2,4,16,12,sp-mz.D.16,66,67,68
3,4,24,8,sp-mz.D.24,59,57,57
4,4,32,6,sp-mz.D.32,58,58,59


In [302]:
lu.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3
0,4,8,24,lu-mz.D.8,151,150,152
1,4,12,16,lu-mz.D.12,1413,1420,1405
2,4,16,12,lu-mz.D.16,156,157,158
3,6,12,24,lu-mz.D.12,1099,1107,1117
4,8,16,24,lu-mz.D.16,77,76,76


# Mean and standart deviation elapsed time

In [303]:
# add a new column to the bt dataframe with the name 'mean elapsed t' as the result of the mean of the 'elapsed1', 'elapsed2' and 'elapsed3' columns
bt['mean elapsed t'] = bt[['elapsed1', 'elapsed2', 'elapsed3']].mean(axis=1)
sp['mean elapsed t'] = sp[['elapsed1', 'elapsed2', 'elapsed3']].mean(axis=1)
lu['mean elapsed t'] = lu[['elapsed1', 'elapsed2', 'elapsed3']].mean(axis=1)
# add a new column to the bt dataframe with the name 'sd elapsed t' as the result of the standart deviation of the 'elapsed1', 'elapsed2' and 'elapsed3' columns
bt['sd elapsed t'] = bt[['elapsed1', 'elapsed2', 'elapsed3']].std(axis=1)
sp['sd elapsed t'] = sp[['elapsed1', 'elapsed2', 'elapsed3']].std(axis=1)
lu['sd elapsed t'] = lu[['elapsed1', 'elapsed2', 'elapsed3']].std(axis=1)

In [304]:
bt.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t
0,4,8,24,bt-mz.D.8,145,144,145,144.666667,0.57735
1,4,12,16,bt-mz.D.12,112,112,112,112.0,0.0
2,4,16,12,bt-mz.D.16,111,110,110,110.333333,0.57735
3,4,24,8,bt-mz.D.24,99,99,99,99.0,0.0
4,4,32,6,bt-mz.D.32,106,105,105,105.333333,0.57735


In [305]:
sp.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t
0,4,8,24,sp-mz.D.8,89,90,90,89.666667,0.57735
1,4,12,16,sp-mz.D.12,82,80,82,81.333333,1.154701
2,4,16,12,sp-mz.D.16,66,67,68,67.0,1.0
3,4,24,8,sp-mz.D.24,59,57,57,57.666667,1.154701
4,4,32,6,sp-mz.D.32,58,58,59,58.333333,0.57735


In [306]:
lu.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t
0,4,8,24,lu-mz.D.8,151,150,152,151.0,1.0
1,4,12,16,lu-mz.D.12,1413,1420,1405,1412.666667,7.505553
2,4,16,12,lu-mz.D.16,156,157,158,157.0,1.0
3,6,12,24,lu-mz.D.12,1099,1107,1117,1107.666667,9.0185
4,8,16,24,lu-mz.D.16,77,76,76,76.333333,0.57735


# Theoretic speedup data

In [307]:
def get_speed_up(mode):
    it = []; speed_up = []
    aux = 3
    for node in [4,6,8]:
        header = True
        file = f'./Out/{mode}-{node}-1.out'
        with open(file, 'r') as f:
            found = False
            for exe in range(8) if mode == 'bt' or mode == 'sp' else range(aux):
                # ignore the first 6 lines
                for i in range(6) if header is True else range(3):
                    f.readline()
                header = False
                # read the next line
                line = f.readline()
                # get the number after the word 'Iterations:'
                if not found:
                    l = 2 if mode == 'bt' or mode == 'lu' else 7
                    it_exe = int(line.split(' ')[l])
                    found = True
                it.append(it_exe)
                # ignore the next 5 lines
                for i in range(5):
                    f.readline()
                # read the next line
                line = f.readline()
                # get the last number in the line
                speed_up_exe = float(line.split(' ')[-1])
                speed_up.append(speed_up_exe)
                # ignore the next 75 lines
                for i in range(63):
                    f.readline()
                if mode == 'sp':
                    for i in range(13):
                        f.readline()
                elif mode == 'lu':
                    for i in range(6):
                        f.readline()
        print(f'end mode {mode} with {node} nodes')
        aux = 1
    return it, speed_up

In [308]:
# get the speed up for each mode
it_bt, speed_up_bt = get_speed_up('bt')
it_sp, speed_up_sp = get_speed_up('sp')
it_lu, speed_up_lu = get_speed_up('lu')

end mode bt with 4 nodes
end mode bt with 6 nodes
end mode bt with 8 nodes
end mode sp with 4 nodes
end mode sp with 6 nodes
end mode sp with 8 nodes
end mode lu with 4 nodes
end mode lu with 6 nodes
end mode lu with 8 nodes


In [309]:
# append the speed up to the dataframes
bt['theoretical speedup'] = speed_up_bt
sp['theoretical speedup'] = speed_up_sp
lu['theoretical speedup'] = speed_up_lu
# append the iterations to the dataframes
bt['iterations'] = it_bt
sp['iterations'] = it_sp
lu['iterations'] = it_lu

In [310]:
bt.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations
0,4,8,24,bt-mz.D.8,145,144,145,144.666667,0.57735,191.98,250
1,4,12,16,bt-mz.D.12,112,112,112,112.0,0.0,191.71,250
2,4,16,12,bt-mz.D.16,111,110,110,110.333333,0.57735,191.96,250
3,4,24,8,bt-mz.D.24,99,99,99,99.0,0.0,191.63,250
4,4,32,6,bt-mz.D.32,106,105,105,105.333333,0.57735,191.88,250


In [311]:
sp.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations
0,4,8,24,sp-mz.D.8,89,90,90,89.666667,0.57735,192.0,500
1,4,12,16,sp-mz.D.12,82,80,82,81.333333,1.154701,190.51,500
2,4,16,12,sp-mz.D.16,66,67,68,67.0,1.0,192.0,500
3,4,24,8,sp-mz.D.24,59,57,57,57.666667,1.154701,190.51,500
4,4,32,6,sp-mz.D.32,58,58,59,58.333333,0.57735,192.0,500


In [312]:
lu.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations
0,4,8,24,lu-mz.D.8,151,150,152,151.0,1.0,192.0,300
1,4,12,16,lu-mz.D.12,1413,1420,1405,1412.666667,7.505553,192.0,300
2,4,16,12,lu-mz.D.16,156,157,158,157.0,1.0,192.0,300
3,6,12,24,lu-mz.D.12,1099,1107,1117,1107.666667,9.0185,288.0,300
4,8,16,24,lu-mz.D.16,77,76,76,76.333333,0.57735,384.0,300


# Seedup and efficiency data

In [313]:
t1_bt = 2206
t1_sp = 1151.58
t1_lu = 1818

In [314]:
# add a column named 'empirical speedup' to the dataframes
bt['empirical speedup'] = t1_bt/bt['mean elapsed t']
sp['empirical speedup'] = t1_sp/sp['mean elapsed t']
lu['empirical speedup'] = t1_lu/lu['mean elapsed t']
# add a column named 'efficiency' to the dataframes
bt['efficiency'] = bt['empirical speedup']*8/(bt['cpus-per-task']* bt['ntasks'])
sp['efficiency'] = sp['empirical speedup']*8/(sp['cpus-per-task']* sp['ntasks'])
lu['efficiency'] = lu['empirical speedup']*8/(lu['cpus-per-task']* lu['ntasks'])

In [315]:
bt

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations,empirical speedup,efficiency
0,4,8,24,bt-mz.D.8,145,144,145,144.666667,0.57735,191.98,250,15.248848,0.635369
1,4,12,16,bt-mz.D.12,112,112,112,112.0,0.0,191.71,250,19.696429,0.820685
2,4,16,12,bt-mz.D.16,111,110,110,110.333333,0.57735,191.96,250,19.993958,0.833082
3,4,24,8,bt-mz.D.24,99,99,99,99.0,0.0,191.63,250,22.282828,0.928451
4,4,32,6,bt-mz.D.32,106,105,105,105.333333,0.57735,191.88,250,20.943038,0.872627
5,4,48,4,bt-mz.D.48,99,98,98,98.333333,0.57735,190.73,250,22.433898,0.934746
6,4,96,2,bt-mz.D.96,97,97,97,97.0,0.0,190.36,250,22.742268,0.947595
7,4,192,1,bt-mz.D.192,100,100,99,99.666667,0.57735,187.17,250,22.133779,0.922241
8,6,12,24,bt-mz.D.12,97,96,97,96.666667,0.57735,287.57,250,22.82069,0.633908
9,6,18,16,bt-mz.D.18,77,77,77,77.0,0.0,287.82,250,28.649351,0.795815


In [316]:
sp

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations,empirical speedup,efficiency
0,4,8,24,sp-mz.D.8,89,90,90,89.666667,0.57735,192.0,500,12.8429,0.535121
1,4,12,16,sp-mz.D.12,82,80,82,81.333333,1.154701,190.51,500,14.15877,0.589949
2,4,16,12,sp-mz.D.16,66,67,68,67.0,1.0,192.0,500,17.187761,0.716157
3,4,24,8,sp-mz.D.24,59,57,57,57.666667,1.154701,190.51,500,19.969595,0.832066
4,4,32,6,sp-mz.D.32,58,58,59,58.333333,0.57735,192.0,500,19.741371,0.822557
5,4,48,4,sp-mz.D.48,54,55,55,54.666667,0.57735,186.18,500,21.065488,0.877729
6,4,96,2,sp-mz.D.96,59,59,59,59.0,0.0,186.18,500,19.518305,0.813263
7,4,192,1,sp-mz.D.192,77,77,75,76.333333,1.154701,170.67,500,15.086201,0.628592
8,6,12,24,sp-mz.D.12,60,61,61,60.666667,0.57735,285.77,500,18.982088,0.52728
9,6,18,16,sp-mz.D.18,57,57,56,56.666667,0.57735,287.44,500,20.322,0.5645


In [317]:
lu

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations,empirical speedup,efficiency
0,4,8,24,lu-mz.D.8,151,150,152,151.0,1.0,192.0,300,12.039735,0.501656
1,4,12,16,lu-mz.D.12,1413,1420,1405,1412.666667,7.505553,192.0,300,1.286928,0.053622
2,4,16,12,lu-mz.D.16,156,157,158,157.0,1.0,192.0,300,11.579618,0.482484
3,6,12,24,lu-mz.D.12,1099,1107,1117,1107.666667,9.0185,288.0,300,1.641288,0.045591
4,8,16,24,lu-mz.D.16,77,76,76,76.333333,0.57735,384.0,300,23.816594,0.496179


In [318]:
# # sort each dataframe by 'empirical speedup' and 'ntasks' in descending order
# bt = bt.sort_values(by=['empirical speedup', 'ntasks'], ascending=False)
# sp = sp.sort_values(by=['empirical speedup', 'ntasks'], ascending=False)
# lu = lu.sort_values(by=['empirical speedup', 'ntasks'], ascending=False)

In [319]:
bt.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations,empirical speedup,efficiency
0,4,8,24,bt-mz.D.8,145,144,145,144.666667,0.57735,191.98,250,15.248848,0.635369
1,4,12,16,bt-mz.D.12,112,112,112,112.0,0.0,191.71,250,19.696429,0.820685
2,4,16,12,bt-mz.D.16,111,110,110,110.333333,0.57735,191.96,250,19.993958,0.833082
3,4,24,8,bt-mz.D.24,99,99,99,99.0,0.0,191.63,250,22.282828,0.928451
4,4,32,6,bt-mz.D.32,106,105,105,105.333333,0.57735,191.88,250,20.943038,0.872627


In [320]:
sp.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations,empirical speedup,efficiency
0,4,8,24,sp-mz.D.8,89,90,90,89.666667,0.57735,192.0,500,12.8429,0.535121
1,4,12,16,sp-mz.D.12,82,80,82,81.333333,1.154701,190.51,500,14.15877,0.589949
2,4,16,12,sp-mz.D.16,66,67,68,67.0,1.0,192.0,500,17.187761,0.716157
3,4,24,8,sp-mz.D.24,59,57,57,57.666667,1.154701,190.51,500,19.969595,0.832066
4,4,32,6,sp-mz.D.32,58,58,59,58.333333,0.57735,192.0,500,19.741371,0.822557


In [321]:
lu.head()

Unnamed: 0,nodes,ntasks,cpus-per-task,partition,elapsed1,elapsed2,elapsed3,mean elapsed t,sd elapsed t,theoretical speedup,iterations,empirical speedup,efficiency
0,4,8,24,lu-mz.D.8,151,150,152,151.0,1.0,192.0,300,12.039735,0.501656
1,4,12,16,lu-mz.D.12,1413,1420,1405,1412.666667,7.505553,192.0,300,1.286928,0.053622
2,4,16,12,lu-mz.D.16,156,157,158,157.0,1.0,192.0,300,11.579618,0.482484
3,6,12,24,lu-mz.D.12,1099,1107,1117,1107.666667,9.0185,288.0,300,1.641288,0.045591
4,8,16,24,lu-mz.D.16,77,76,76,76.333333,0.57735,384.0,300,23.816594,0.496179


# bt application analysis

## Elapsed time vs ntasks (MPIs)

In [322]:
fig = px.line(x=bt['ntasks'], y=bt['mean elapsed t'],
                markers=True,
                color=bt['nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean elapsed time for bt application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Elapsed ime (s)')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_elapsed_time_bt.png")

## Speedup vs ntasks (MPIs)

In [323]:
fig = px.line(x=bt['ntasks'], y=bt['empirical speedup'],
                markers=True,
                color=bt['nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean speedup for bt application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Speedup')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_speedup_bt.png")

## Efficiency vs ntasks (MPIs)

In [324]:
fig = px.line(x=bt['ntasks'], y=bt['efficiency'],
                markers=True,
                color=bt['nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean efficiency for bt application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Efficiency')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_efficiency_bt.png")

# sp application analysis

## Speedup vs ntasks (MPIs)

In [325]:
fig = px.line(x=sp['ntasks'], y=sp['empirical speedup'],
                markers=True,
                color=sp['nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean speedup for sp application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Speedup')

                    

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_speedup_sp.png")

## Efficiency vs ntasks (MPIs)

In [326]:
fig = px.line(x=sp['ntasks'], y=sp['efficiency'],
                markers=True,
                color=sp['nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean efficiency for sp application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Efficiency')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_efficiency_sp.png")

# lu application analysis

## Elapsed time vs ntasks (MPIs)

In [327]:
fig = px.line(x=lu['ntasks'], y=lu['mean elapsed t'],
                markers=True,
                color=lu['nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean elapsed time for lu application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Elapsed ime (s)')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_elapsed_time_lu.png")

## Speedup vs ntasks (MPIs)

In [328]:
fig = px.line(x=lu['ntasks'], y=lu['empirical speedup'],
                markers=True,
                color=lu['nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean speedup for lu application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Speedup')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_speedup_lu.png")

## Efficiency vs ntasks (MPIs)

In [329]:
fig = px.line(x=lu['ntasks'], y=lu['efficiency'],
                markers=True,
                color=lu['nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean efficiency for lu application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Efficiency')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_efficiency_lu.png")

# Economic analysis

In [330]:
# read csv file

# command to obtain a row: sacct -j <jobid> --format=JobID,CPUTime,NCPUS,ReqCPUS,ConsumedEnergy,AveCPUFreq,JobName,Elapsed -p
cost = pd.read_csv('Data/cost.csv', sep='|')
cost.head()

Unnamed: 0,Nodes,JobID,CPUTime,NCPUS,ReqCPUS,ConsumedEnergy,AveCPUFreq,JobName,Elapsed
0,4,28076954.0,07:44:00,192,192,170.18K,1.23M,bt-mz.D.8,00:02:25
1,4,28077461.0,05:20:00,192,192,144.33K,277.44M,bt-mz.D.192,00:01:40
2,8,28024862.0,07:53:36,384,384,171.94K,28.29M,bt-mz.D.16,00:01:14
3,8,28024867.0,07:21:36,384,384,182.28K,5.26G,bt-mz.D.384,00:01:09
4,4,28024930.0,04:48:00,192,192,112.23K,900K,sp-mz.D.8,00:01:30


In [331]:
# transform JobName column type to string
cost['JobName'] = cost['JobName'].astype(str)

In [332]:
# add a new column named 'ntask' to the dataframes, that is the last string of the JobName column after a split
cost['ntasks'] = cost['JobName'].str.split('.').str[-1]
# trasnform ntask column type to int
cost['ntasks'] = cost['ntasks'].astype(int)

In [333]:
# print columns types
cost.dtypes

Nodes               int64
JobID             float64
CPUTime            object
NCPUS               int64
ReqCPUS             int64
ConsumedEnergy     object
AveCPUFreq         object
JobName            object
Elapsed            object
ntasks              int32
dtype: object

In [334]:
cost['Elapsed'] = cost['Elapsed'].apply(lambda x: sum(int(i) * 60 ** (2-j) for j, i in enumerate(x.split(':'))))

In [335]:
# transform 'ConsumedEnergy' column to float. The initial format is 'x.xxK', so we need to remove the 'K' and transform to integer
cost['ConsumedEnergy'] = cost['ConsumedEnergy'].apply(lambda x: float(x[:-1]) if x[-1] == 'K' else float(x))

In [336]:
cost.dtypes

Nodes               int64
JobID             float64
CPUTime            object
NCPUS               int64
ReqCPUS             int64
ConsumedEnergy    float64
AveCPUFreq         object
JobName            object
Elapsed             int64
ntasks              int32
dtype: object

In [337]:
cost.head()

Unnamed: 0,Nodes,JobID,CPUTime,NCPUS,ReqCPUS,ConsumedEnergy,AveCPUFreq,JobName,Elapsed,ntasks
0,4,28076954.0,07:44:00,192,192,170.18,1.23M,bt-mz.D.8,145,8
1,4,28077461.0,05:20:00,192,192,144.33,277.44M,bt-mz.D.192,100,192
2,8,28024862.0,07:53:36,384,384,171.94,28.29M,bt-mz.D.16,74,16
3,8,28024867.0,07:21:36,384,384,182.28,5.26G,bt-mz.D.384,69,384
4,4,28024930.0,04:48:00,192,192,112.23,900K,sp-mz.D.8,90,8


In [338]:
# add a column named 'Mean power' to cost dataframe, that is the energy consumed divided by the elapsed time
cost['Mean power'] = cost['ConsumedEnergy']/cost['Elapsed']

In [339]:
cost.head()

Unnamed: 0,Nodes,JobID,CPUTime,NCPUS,ReqCPUS,ConsumedEnergy,AveCPUFreq,JobName,Elapsed,ntasks,Mean power
0,4,28076954.0,07:44:00,192,192,170.18,1.23M,bt-mz.D.8,145,8,1.173655
1,4,28077461.0,05:20:00,192,192,144.33,277.44M,bt-mz.D.192,100,192,1.4433
2,8,28024862.0,07:53:36,384,384,171.94,28.29M,bt-mz.D.16,74,16,2.323514
3,8,28024867.0,07:21:36,384,384,182.28,5.26G,bt-mz.D.384,69,384,2.641739
4,4,28024930.0,04:48:00,192,192,112.23,900K,sp-mz.D.8,90,8,1.247


In [340]:
# add a new column named 'Cost' to cost dataframe, that is the mean power multiplied and divided by 3600 (to convert to hours) and multiplied by the cost of the energy (0.2)
cost['Cost'] = (cost['Mean power']*cost['Elapsed']*0.2)/3600

In [341]:
cost

Unnamed: 0,Nodes,JobID,CPUTime,NCPUS,ReqCPUS,ConsumedEnergy,AveCPUFreq,JobName,Elapsed,ntasks,Mean power,Cost
0,4,28076954.0,07:44:00,192,192,170.18,1.23M,bt-mz.D.8,145,8,1.173655,0.009454
1,4,28077461.0,05:20:00,192,192,144.33,277.44M,bt-mz.D.192,100,192,1.4433,0.008018
2,8,28024862.0,07:53:36,384,384,171.94,28.29M,bt-mz.D.16,74,16,2.323514,0.009552
3,8,28024867.0,07:21:36,384,384,182.28,5.26G,bt-mz.D.384,69,384,2.641739,0.010127
4,4,28024930.0,04:48:00,192,192,112.23,900K,sp-mz.D.8,90,8,1.247,0.006235
5,4,28024931.0,04:03:12,192,192,124.08,974.15M,sp-mz.D.192,76,192,1.632632,0.006893
6,8,28024934.0,05:00:48,384,384,112.56,44.75M,sp-mz.D.16,47,16,2.394894,0.006253
7,8,28024939.0,04:28:48,384,384,130.08,14.94G,sp-mz.D.384,42,384,3.097143,0.007227


## bt application

In [342]:
# get the rows where the first string of the JobName column is 'bt' after a split
bt_cost = cost[cost['JobName'].str.split('.').str[0] == 'bt-mz']

fig = px.line(x=bt_cost['ntasks'], y=bt_cost['Mean power'],
                markers=True,
                color=bt_cost['Nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean power for bt application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Mean power (kW)')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_power_bt.png")

In [343]:
fig = px.line(x=bt_cost['ntasks'], y=bt_cost['Cost'],
                markers=True,
                color=bt_cost['Nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean cost for bt application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Cost (€)')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_cost_bt.png")

## sp application

In [344]:
# get the rows where the first string of the JobName column is 'bt' after a split
sp_cost = cost[cost['JobName'].str.split('.').str[0] == 'sp-mz']

fig = px.line(x=sp_cost['ntasks'], y=sp_cost['Mean power'],
                markers=True,
                color=sp_cost['Nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean power for sp application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Mean power (kW)')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_power_sp.png")

In [345]:
fig = px.line(x=sp_cost['ntasks'], y=sp_cost['Cost'],
                markers=True,
                color=sp_cost['Nodes'],
                color_discrete_sequence=['#440154', '#21918c', '#fde725'])

# change the title
fig.update_layout(title='Mean cost for sp application',
                    legend_title_text='Nodes',
                    xaxis_title='Number of tasks (MPIs)',
                    yaxis_title='Cost (€)')

fig.show()
# save the figure as a png file
fig.write_image("Figures/mean_cost_sp.png")

In [349]:
# sum all the values of column 'Cost'
total_cost = cost['Cost'].sum()
print('Total cost: ', total_cost, '€')

# sum all the values of column 'Cost' where the first string of the JobName column is 'bt' after a split
total_cost_bt = bt_cost['Cost'].sum()
print('Total cost for bt application: ', total_cost_bt, '€')

# sum all the values of column 'Cost' where the first string of the JobName column is 'sp' after a split
total_cost_sp = sp_cost['Cost'].sum()
print('Total cost for sp application: ', total_cost_sp, '€')

Total cost:  0.06376000000000001 €
Total cost for bt application:  0.037151666666666666 €
Total cost for sp application:  0.026608333333333338 €


# Schedule lu application analysis