In [49]:
import os
import numpy as np
import pandas as pd
from pandas._libs.tslibs import dtypes, timestamps
import subprocess
from functools import reduce
import matplotlib
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
# Using matplotlib backend: QtAgg

In [None]:
system = "aurora01"
model = "resnet50"
n_nodes = "4"
n_epochs = "2"
batch_size = "64"
save_every = "1"
log = "true"

filename = "dstat.csv"

if system == "aurora01":
    stat_dir = "/home/gsd/andrelucena/statistics"
elif system == "slurm":
    stat_dir = "/projects/a97485/statistics"

stat_test = "eBPFs_subset_t2"
if system == "aurora01":
    test_name = model + "_" + n_epochs + "_" + batch_size + "_" + save_every + "_" + log
elif system == "slurm":
    test_name = model + "_" + n_nodes + "_" + n_epochs + "_" + batch_size + "_" + save_every

full_test_path = stat_dir + "/" + stat_test + "/" + test_name
if system == "slurm":
    full_test_path += "/aurora03"

p = subprocess.Popen(["scp", f"{system}:{full_test_path}/dstat.csv", "./dstat.csv"])
sts = os.waitpid(p.pid, 0)
p = subprocess.Popen(["scp", f"{system}:{full_test_path}/gpu.csv", "./gpu.csv"])
sts = os.waitpid(p.pid, 0)

df = pd.read_csv(f"./dstat.csv", skiprows=[0,1,2,3,4], index_col=False)

#os.remove("./dstat.csv")

df_gpu = pd.read_csv(f"./gpu.csv", dtype={"utilization.gpu": float}, index_col=False)

#os.remove("./gpu.csv")

In [51]:

print(df.head())
print(df_gpu.head())

             time    usr     sys     idl    wai  stl          read  \
0  06-11 05:18:05  2.878   1.421  91.804  3.897    0  1.857222e+07   
1  06-11 05:18:06  6.429  37.328  54.744  1.498    0  1.403699e+07   
2  06-11 05:18:07  3.558  88.639   7.054  0.749    0  1.843200e+07   
3  06-11 05:18:08  3.061  91.943   4.435  0.562    0  9.797632e+06   
4  06-11 05:18:09  3.868  84.903  10.231  0.998    0  9.482240e+06   

         writ    read.1  writ.1  recv  send        used        free   buff  \
0  227154.528   508.406   3.295     0     0  2169704448  5260066816  32768   
1  180224.000   587.000  18.000  1680     0  2521677824  4899565568  32768   
2       0.000  1084.000   0.000  1440     0  2721652736  4689772544  32768   
3   12288.000   490.000   1.000  1200     0  2805960704  4599197696  32768   
4   20480.000   790.000   2.000  1549    93  2868367360  4531073024  32768   

         cach         in        out  
0  5992116224  29569.202  48517.869  
1  6000644096      0.000      0.00

In [52]:

# df_gpu.drop(columns=['name', 'pci.bus_id'], inplace=True)
# rename columns
df.rename(columns={
'read':'read_io_total_nops',
'writ':'write_io_total_nops',
'time':'system_time',
'usr':'usr_cpu_usage',
'sys':'sys_cpu_usage',
'idl':'idl_cpu_usage',
'wai':'wai_cpu_usage',
'stl':'stl_cpu_usage',
'read.1':'read_dsk_total_bytes',
'writ.1':'writ_dsk_total_bytes',
'used':'used_memory',
'free':'free_memory',
'buff':'buff_memory',
'cach':'cach_memory',
'recv':'recv_net_total',
'send':'send_net_total',
'in':'in_paging',
'out':'out_paging'}, inplace=True)

df_gpu.rename(columns={
' temperature.gpu':'temperature_gpu',
' utilization.gpu [%]':'utilization_gpu',
' utilization.memory [%]':'utilization_memory',
' memory.total [MiB]':'memory_total',
' memory.free [MiB]':'memory_free',
' memory.used [MiB]':'memory_used'}, inplace=True)

In [53]:
df['system_time'] = pd.to_datetime(df['system_time'], format='%d-%m %H:%M:%S')
df_gpu['timestamp'] = pd.to_datetime(df_gpu['timestamp'], format='ISO8601')
print(df.dtypes)
print(df_gpu.dtypes)

system_time             datetime64[ns]
usr_cpu_usage                  float64
sys_cpu_usage                  float64
idl_cpu_usage                  float64
wai_cpu_usage                  float64
stl_cpu_usage                    int64
read_io_total_nops             float64
write_io_total_nops            float64
read_dsk_total_bytes           float64
writ_dsk_total_bytes           float64
recv_net_total                   int64
send_net_total                   int64
used_memory                      int64
free_memory                      int64
buff_memory                      int64
cach_memory                      int64
in_paging                      float64
out_paging                     float64
dtype: object
timestamp             datetime64[ns]
temperature_gpu                int64
utilization_gpu                int64
utilization_memory             int64
memory_total                   int64
memory_free                    int64
memory_used                    int64
dtype: object


In [54]:
# Totals

print(f"Read IO Total: {df['read_io_total_nops'].sum():,}")
print(f"Write IO Total: {df['write_io_total_nops'].sum():,}")
print(f"Read Disk Total: {df['read_dsk_total_bytes'].sum():,}")
print(f"Write Disk Total: {df['writ_dsk_total_bytes'].sum():,}")
print(f"Used Memory: {df['used_memory'].sum():,}")
print(f"Free Memory: {df['free_memory'].sum():,}")
print(f"Buffer Memory: {df['buff_memory'].sum():,}")
print(f"Cache Memory: {df['cach_memory'].sum():,}")
print(f"Receive Net Total: {df['recv_net_total'].sum():,}")
print(f"Send Net Total: {df['send_net_total'].sum():,}")
print()
print (f"Total GPU Utilization Memory (%): {df_gpu['utilization_memory'].sum():,}")
print (f"Total GPU Memory Total (MiB): {df_gpu['memory_total'].sum():,}")
print (f"Total GPU Memory Free (MiB): {df_gpu['memory_free'].sum():,}")
print (f"Total GPU Memory Used (MiB): {df_gpu['memory_used'].sum():,}")

Read IO Total: 114,887,041,978.959
Write IO Total: 535,218,002.528
Read Disk Total: 3,036,236.406
Write Disk Total: 3,954.295
Used Memory: 22,268,702,240,768
Free Memory: 909,984,485,376
Buffer Memory: 109,707,264
Cache Memory: 24,057,547,681,792
Receive Net Total: 5,372,263
Send Net Total: 20,011

Total GPU Utilization Memory (%): 253,133
Total GPU Memory Total (MiB): 27,377,664
Total GPU Memory Free (MiB): 4,833,350
Total GPU Memory Used (MiB): 21,200,824


In [55]:
# Means

print(f"Read IO Average: {df['read_io_total_nops'].mean():,}")
print(f"Write IO Average: {df['write_io_total_nops'].mean():,}")
print(f"Read Disk Average: {df['read_dsk_total_bytes'].mean():,}")
print(f"Write Disk Average: {df['writ_dsk_total_bytes'].mean():,}")
print(f"Used Memory Average: {df['used_memory'].mean():,}")
print(f"Free Memory Average: {df['free_memory'].mean():,}")
print(f"Buffer Memory Average: {df['buff_memory'].mean():,}")
print(f"Cache Memory Average: {df['cach_memory'].mean():,}")
print(f"Receive Net Average: {df['recv_net_total'].mean():,}")
print(f"Send Net Average: {df['send_net_total'].mean():,}")
print(f"User CPU Usage Average: {df['usr_cpu_usage'].mean():,}")
print(f"System CPU Usage Average: {df['sys_cpu_usage'].mean():,}")
print(f"Idle CPU Usage Average: {df['idl_cpu_usage'].mean():,}")
print(f"Wait CPU Usage Average: {df['wai_cpu_usage'].mean():,}")
print()
print (f"GPU Temperature Average: {df_gpu['temperature_gpu'].mean():,}")
print (f"GPU Utilization Average (%): {df_gpu['utilization_gpu'].mean():,}")
print (f"GPU Utilization Memory Average (%): {df_gpu['utilization_memory'].mean():,}")
print (f"GPU Memory Total Average (MiB): {df_gpu['memory_total'].mean():,}")
print (f"GPU Memory Free Average (MiB): {df_gpu['memory_free'].mean():,}")
print (f"GPU Memory Used Average (MiB): {df_gpu['memory_used'].mean():,}")

Read IO Average: 34,315,126.03911559
Write IO Average: 159,862.00792353644
Read Disk Average: 906.8806469534051
Write Disk Average: 1.181091696535245
Used Memory Average: 6,651,344,755.307049
Free Memory Average: 271,799,428.12903225
Buffer Memory Average: 32,768.0
Cache Memory Average: 7,185,647,455.732378
Receive Net Average: 1,604.618578255675
Send Net Average: 5.9770011947431305
User CPU Usage Average: 6.572927718040622
System CPU Usage Average: 5.308352150537634
Idle CPU Usage Average: 83.9060394265233
Wait CPU Usage Average: 4.212676523297492

GPU Temperature Average: 83.84709754637942
GPU Utilization Average (%): 99.62477558348294
GPU Utilization Memory Average (%): 75.74296828246558
GPU Memory Total Average (MiB): 8,192.0
GPU Memory Free Average (MiB): 1,446.244763614602
GPU Memory Used Average (MiB): 6,343.753441053262


In [56]:
# Max and Min values
print(f"Max Read IO: {df['read_io_total_nops'].max():,} ; Min Read IO: {df['read_io_total_nops'].min():,}")
print(f"Max Write IO: {df['write_io_total_nops'].max():,} ; Min Write IO: {df['write_io_total_nops'].min():,}")
print(f"Max Read Disk: {df['read_dsk_total_bytes'].max():,} ; Min Read Disk: {df['read_dsk_total_bytes'].min():,}")
print(f"Max Write Disk: {df['writ_dsk_total_bytes'].max():,} ; Min Write Disk: {df['writ_dsk_total_bytes'].min():,}")
print(f"Max Used Memory: {df['used_memory'].max():,} ; Min Used Memory: {df['used_memory'].min():,}")
print(f"Max Free Memory: {df['free_memory'].max():,} ; Min Free Memory: {df['free_memory'].min():,}")
print(f"Max Buffer Memory: {df['buff_memory'].max():,} ; Min Buffer Memory: {df['buff_memory'].min():,}")
print(f"Max Cache Memory: {df['cach_memory'].max():,} ; Min Cache Memory: {df['cach_memory'].min():,}")
print(f"Max Receive Net: {df['recv_net_total'].max():,} ; Min Receive Net: {df['recv_net_total'].min():,}")
print(f"Max Send Net: {df['send_net_total'].max():,} ; Min Send Net: {df['send_net_total'].min():,}")
print(f"Max User CPU Usage: {df['usr_cpu_usage'].max():,} ; Min User CPU Usage: {df['usr_cpu_usage'].min():,}")
print(f"Max System CPU Usage: {df['sys_cpu_usage'].max():,} ; Min System CPU Usage: {df['sys_cpu_usage'].min():,}")
print(f"Max Idle CPU Usage: {df['idl_cpu_usage'].max():,} ; Min Idle CPU Usage: {df['idl_cpu_usage'].min():,}")
print(f"Max Wait CPU Usage: {df['wai_cpu_usage'].max():,} ; Min Wait CPU Usage: {df['wai_cpu_usage'].min():,}")
print()
print (f"Max GPU Temperature: {df_gpu['temperature_gpu'].max():,} ; Min GPU Temperature: {df_gpu['temperature_gpu'].min():,}")
print (f"Max GPU Utilization (%): {df_gpu['utilization_gpu'].max():,} ; Min GPU Utilization: {df_gpu['utilization_gpu'].min():,}")
print (f"Max GPU Utilization Memory (MiB): {df_gpu['utilization_memory'].max():,} ; Min GPU Utilization Memory (MiB): {df_gpu['utilization_memory'].min():,}")
print (f"Max GPU Memory Total (MiB): {df_gpu['memory_total'].max():,} ; Min GPU Memory Total (MiB): {df_gpu['memory_total'].min():,}")
print (f"Max GPU Memory Free (MiB): {df_gpu['memory_free'].max():,} ; Min GPU Memory Free (MiB): {df_gpu['memory_free'].min():,}")
print (f"Max GPU Memory Used (MiB): {df_gpu['memory_used'].max():,} ; Min GPU Memory Used (MiB): {df_gpu['memory_used'].min():,}")

Max Read IO: 75,681,792.0 ; Min Read IO: 0.0
Max Write IO: 205,148,160.0 ; Min Write IO: 0.0
Max Read Disk: 1,903.0 ; Min Read Disk: 0.0
Max Write Disk: 566.0 ; Min Write Disk: 0.0
Max Used Memory: 6,866,055,168 ; Min Used Memory: 2,169,704,448
Max Free Memory: 5,260,066,816 ; Min Free Memory: 159,510,528
Max Buffer Memory: 32,768 ; Min Buffer Memory: 32,768
Max Cache Memory: 7,640,846,336 ; Min Cache Memory: 5,992,116,224
Max Receive Net: 2,302 ; Min Receive Net: 0
Max Send Net: 328 ; Min Send Net: 0
Max User CPU Usage: 18.848 ; Min User CPU Usage: 2.438
Max System CPU Usage: 91.943 ; Min System CPU Usage: 1.421
Max Idle CPU Usage: 93.625 ; Min Idle CPU Usage: 3.191
Max Wait CPU Usage: 24.593 ; Min Wait CPU Usage: 0.0

Max GPU Temperature: 85 ; Min GPU Temperature: 67
Max GPU Utilization (%): 100 ; Min GPU Utilization: 0
Max GPU Utilization Memory (MiB): 77 ; Min GPU Utilization Memory (MiB): 0
Max GPU Memory Total (MiB): 8,192 ; Min GPU Memory Total (MiB): 8,192
Max GPU Memory Free (

In [57]:
def plotAction(df_param: pd.DataFrame, X, Y):

    # plots action with time
    plt.figure()
    for x in X:
        for y in Y:
            plt.plot(df_param[x], df_param[y], label=y, marker = "+")
    plt.xlabel(X[0])
    plt.ylabel(Y[0])
    plt.title(f"{X} per {Y}")
    plt.legend()
    plt.show()

In [58]:
plotAction(df, ['system_time'], ['read_io_total_nops', 'write_io_total_nops'])

In [59]:
# plots Disk with time
plotAction(df, ['system_time'], ['read_dsk_total_bytes', 'writ_dsk_total_bytes'])

In [60]:
# plots Memory with time
plt.figure()
plt.plot(df['system_time'], df['used_memory'], label='Used Memory')
plt.plot(df['system_time'], df['free_memory'], label='Free Memory')
plt.plot(df['system_time'], df['buff_memory'], label='Buffer Memory')
plt.plot(df['system_time'], df['cach_memory'], label='Cache Memory')
plt.xlabel('Time ')
plt.ylabel('Memory (bytes)')
plt.title('Memory with time')
plt.legend()
plt.show()

In [61]:
# plots Network with time
plt.figure()
plt.plot(df['system_time'], df['recv_net_total'], label='Received Net')
plt.plot(df['system_time'], df['send_net_total'], label='Send Net')
plt.xlabel('Time ')
plt.ylabel('Network (bytes)')
plt.title('Network with time')
plt.legend()
plt.show()

In [62]:
# plots CPU with time
plt.figure()
plt.plot(df['system_time'], df['usr_cpu_usage'], label='usr CPU usage')
plt.plot(df['system_time'], df['sys_cpu_usage'], label='sys CPU usage')
plt.plot(df['system_time'], df['idl_cpu_usage'], label='idl CPU usage')
plt.plot(df['system_time'], df['wai_cpu_usage'], label='wai CPU usage')
plt.xlabel('Time ')
plt.ylabel('CPU (%)')
plt.title('CPU with time')
plt.legend()
plt.show()

In [63]:
# plots gpu temperature with time
plt.figure()
plt.plot(df_gpu['timestamp'], df_gpu['temperature_gpu'], label='Temperature')
plt.xlabel('Time ')
plt.ylabel('Temperature (°C)')
plt.title('GPU Temperature with time')
plt.legend()
plt.show()

In [64]:
# plots gpu utilization with time
plt.figure()
plt.plot(df_gpu['timestamp'], df_gpu['utilization_gpu'], label='Utilization')
plt.xlabel('Time ')
plt.ylabel('Utilization (%)')
plt.title('GPU Utilization with time')
plt.legend()
plt.show()

In [65]:
# plots gpu memory with time
plt.figure()
plt.plot(df_gpu['timestamp'], df_gpu['memory_total'], label='Total Memory')
plt.plot(df_gpu['timestamp'], df_gpu['memory_free'], label='Free Memory')
plt.plot(df_gpu['timestamp'], df_gpu['memory_used'], label='Used Memory')
plt.xlabel('Time ')
plt.ylabel('Memory (MiB)')
plt.title('GPU Memory with time')
plt.legend()
plt.show()