In [3]:
import os
import numpy as np
import pandas as pd
from pandas._libs.tslibs import dtypes, timestamps
import subprocess
from functools import reduce
import matplotlib
matplotlib.use('Qt5Agg')
import matplotlib.pyplot as plt
# Using matplotlib backend: QtAgg

In [None]:
system = "slurm"
model = "alexnet"
n_nodes = "4"
n_epochs = "2"
batch_size = "32"
save_every = "0"
log = "true"

filename = "dstat.csv"

if system == "aurora01":
    stat_dir = "/home/gsd/andrelucena/statistics"
elif system == "slurm":
    stat_dir = "/projects/a97485/statistics"

stat_test = "eBPFs_subset_t2"
if system == "aurora01":
    test_name = model + "_" + n_epochs + "_" + batch_size + "_" + save_every + "_" + log
elif system == "slurm":
    test_name = model + "_" + n_nodes + "_" + n_epochs + "_" + batch_size + "_" + save_every + "_" + log

full_test_path = stat_dir + "/" + stat_test + "/" + test_name
if system == "slurm":
    full_test_path += "/aurora03"

p = subprocess.Popen(["scp", f"{system}:{full_test_path}/dstat.csv", "./dstat.csv"])
sts = os.waitpid(p.pid, 0)
p = subprocess.Popen(["scp", f"{system}:{full_test_path}/gpu.csv", "./gpu.csv"])
sts = os.waitpid(p.pid, 0)

df = pd.read_csv(f"./dstat.csv", skiprows=[0,1,2,3,4], index_col=False)

os.remove("./dstat.csv")

df_gpu = pd.read_csv(f"./gpu.csv", dtype={"utilization.gpu": float}, index_col=False)

os.remove("./gpu.csv")

In [5]:

print(df.head())
print(df_gpu.head())

             time    usr     sys     idl    wai  stl          read       writ  \
0  09-11 22:22:36  1.067   1.239  97.516  0.178    0  4.871991e+05  31035.651   
1  09-11 22:22:37  1.399  11.832  86.260  0.509    0  1.128038e+07  40960.000   
2  09-11 22:22:38  1.389   4.167  94.192  0.253    0  0.000000e+00      0.000   
3  09-11 22:22:39  1.646   4.177  94.051  0.127    0  0.000000e+00  24576.000   
4  09-11 22:22:40  1.267   4.056  94.423  0.253    0  0.000000e+00      0.000   

   read.1  writ.1     recv     send        used         free    buff  \
0  10.378   1.301        0        0  9917571072  23150911488  290816   
1  82.000   6.000  8711189  6610580  9939308544  23123722240  290816   
2   0.000   0.000  5045796  4288752  9939619840  23123206144  290816   
3   0.000   2.000  5111648  4348222  9939615744  23123206144  290816   
4   0.000   0.000  4986698  4245038  9939865600  23122948096  290816   

        cach         in       out  
0  126787584  11842.724  9666.075  
1  13223

In [6]:

# df_gpu.drop(columns=['name', 'pci.bus_id'], inplace=True)
# rename columns
df.rename(columns={
'read':'read_io_total_nops',
'writ':'write_io_total_nops',
'time':'system_time',
'usr':'usr_cpu_usage',
'sys':'sys_cpu_usage',
'idl':'idl_cpu_usage',
'wai':'wai_cpu_usage',
'stl':'stl_cpu_usage',
'read.1':'read_dsk_total_bytes',
'writ.1':'writ_dsk_total_bytes',
'used':'used_memory',
'free':'free_memory',
'buff':'buff_memory',
'cach':'cach_memory',
'recv':'recv_net_total',
'send':'send_net_total',
'in':'in_paging',
'out':'out_paging'}, inplace=True)

df_gpu.rename(columns={
' temperature.gpu':'temperature_gpu',
' utilization.gpu [%]':'utilization_gpu',
' utilization.memory [%]':'utilization_memory',
' memory.total [MiB]':'memory_total',
' memory.free [MiB]':'memory_free',
' memory.used [MiB]':'memory_used'}, inplace=True)

In [7]:
df['system_time'] = pd.to_datetime(df['system_time'], format='%d-%m %H:%M:%S')
df_gpu['timestamp'] = pd.to_datetime(df_gpu['timestamp'], format='ISO8601')
print(df.dtypes)
print(df_gpu.dtypes)

system_time             datetime64[ns]
usr_cpu_usage                  float64
sys_cpu_usage                  float64
idl_cpu_usage                  float64
wai_cpu_usage                  float64
stl_cpu_usage                    int64
read_io_total_nops             float64
write_io_total_nops            float64
read_dsk_total_bytes           float64
writ_dsk_total_bytes           float64
recv_net_total                   int64
send_net_total                   int64
used_memory                      int64
free_memory                      int64
buff_memory                      int64
cach_memory                      int64
in_paging                      float64
out_paging                     float64
dtype: object
timestamp             datetime64[ns]
temperature_gpu                int64
utilization_gpu                int64
utilization_memory             int64
memory_total                   int64
memory_free                    int64
memory_used                    int64
dtype: object


In [8]:
# Totals

print(f"Read IO Total: {df['read_io_total_nops'].sum():,}")
print(f"Write IO Total: {df['write_io_total_nops'].sum():,}")
print(f"Read Disk Total: {df['read_dsk_total_bytes'].sum():,}")
print(f"Write Disk Total: {df['writ_dsk_total_bytes'].sum():,}")
print(f"Used Memory: {df['used_memory'].sum():,}")
print(f"Free Memory: {df['free_memory'].sum():,}")
print(f"Buffer Memory: {df['buff_memory'].sum():,}")
print(f"Cache Memory: {df['cach_memory'].sum():,}")
print(f"Receive Net Total: {df['recv_net_total'].sum():,}")
print(f"Send Net Total: {df['send_net_total'].sum():,}")
print()
print (f"Total GPU Utilization Memory (%): {df_gpu['utilization_memory'].sum():,}")
print (f"Total GPU Memory Total (MiB): {df_gpu['memory_total'].sum():,}")
print (f"Total GPU Memory Free (MiB): {df_gpu['memory_free'].sum():,}")
print (f"Total GPU Memory Used (MiB): {df_gpu['memory_used'].sum():,}")

Read IO Total: 95,403,807.068
Write IO Total: 13,404,475.651
Read Disk Total: 1,078.378
Write Disk Total: 1,867.301
Used Memory: 33,297,138,544,640
Free Memory: 29,497,794,560,000
Buffer Memory: 1,936,863,232
Cache Memory: 25,722,577,383,424
Receive Net Total: 1,940,618,065,423
Send Net Total: 1,924,991,167,659

Total GPU Utilization Memory (%): 15,654
Total GPU Memory Total (MiB): 43,439,760
Total GPU Memory Free (MiB): 38,526,641
Total GPU Memory Used (MiB): 4,104,259


In [9]:
# Means

print(f"Read IO Average: {df['read_io_total_nops'].mean():,}")
print(f"Write IO Average: {df['write_io_total_nops'].mean():,}")
print(f"Read Disk Average: {df['read_dsk_total_bytes'].mean():,}")
print(f"Write Disk Average: {df['writ_dsk_total_bytes'].mean():,}")
print(f"Used Memory Average: {df['used_memory'].mean():,}")
print(f"Free Memory Average: {df['free_memory'].mean():,}")
print(f"Buffer Memory Average: {df['buff_memory'].mean():,}")
print(f"Cache Memory Average: {df['cach_memory'].mean():,}")
print(f"Receive Net Average: {df['recv_net_total'].mean():,}")
print(f"Send Net Average: {df['send_net_total'].mean():,}")
print(f"User CPU Usage Average: {df['usr_cpu_usage'].mean():,}")
print(f"System CPU Usage Average: {df['sys_cpu_usage'].mean():,}")
print(f"Idle CPU Usage Average: {df['idl_cpu_usage'].mean():,}")
print(f"Wait CPU Usage Average: {df['wai_cpu_usage'].mean():,}")
print()
print (f"GPU Temperature Average: {df_gpu['temperature_gpu'].mean():,}")
print (f"GPU Utilization Average (%): {df_gpu['utilization_gpu'].mean():,}")
print (f"GPU Utilization Memory Average (%): {df_gpu['utilization_memory'].mean():,}")
print (f"GPU Memory Total Average (MiB): {df_gpu['memory_total'].mean():,}")
print (f"GPU Memory Free Average (MiB): {df_gpu['memory_free'].mean():,}")
print (f"GPU Memory Used Average (MiB): {df_gpu['memory_used'].mean():,}")

Read IO Average: 35,947.17674001507
Write IO Average: 5,050.669047098719
Read Disk Average: 0.4063217784476262
Write Disk Average: 0.7035798794272795
Used Memory Average: 12,546,020,551.861341
Free Memory Average: 11,114,466,676.714394
Buffer Memory Average: 729,790.2155237377
Cache Memory Average: 9,692,003,535.577995
Receive Net Average: 731,204,998.2754333
Send Net Average: 725,316,943.353052
User CPU Usage Average: 9.35927128862095
System CPU Usage Average: 23.533714770158255
Idle CPU Usage Average: 59.227159758854555
Wait CPU Usage Average: 7.879871891484552

GPU Temperature Average: 54.542986425339365
GPU Utilization Average (%): 96.00565610859728
GPU Utilization Memory Average (%): 5.902714932126697
GPU Memory Total Average (MiB): 16,380.0
GPU Memory Free Average (MiB): 14,527.391025641025
GPU Memory Used Average (MiB): 1,547.6089743589744


In [10]:
# Max and Min values
print(f"Max Read IO: {df['read_io_total_nops'].max():,} ; Min Read IO: {df['read_io_total_nops'].min():,}")
print(f"Max Write IO: {df['write_io_total_nops'].max():,} ; Min Write IO: {df['write_io_total_nops'].min():,}")
print(f"Max Read Disk: {df['read_dsk_total_bytes'].max():,} ; Min Read Disk: {df['read_dsk_total_bytes'].min():,}")
print(f"Max Write Disk: {df['writ_dsk_total_bytes'].max():,} ; Min Write Disk: {df['writ_dsk_total_bytes'].min():,}")
print(f"Max Used Memory: {df['used_memory'].max():,} ; Min Used Memory: {df['used_memory'].min():,}")
print(f"Max Free Memory: {df['free_memory'].max():,} ; Min Free Memory: {df['free_memory'].min():,}")
print(f"Max Buffer Memory: {df['buff_memory'].max():,} ; Min Buffer Memory: {df['buff_memory'].min():,}")
print(f"Max Cache Memory: {df['cach_memory'].max():,} ; Min Cache Memory: {df['cach_memory'].min():,}")
print(f"Max Receive Net: {df['recv_net_total'].max():,} ; Min Receive Net: {df['recv_net_total'].min():,}")
print(f"Max Send Net: {df['send_net_total'].max():,} ; Min Send Net: {df['send_net_total'].min():,}")
print(f"Max User CPU Usage: {df['usr_cpu_usage'].max():,} ; Min User CPU Usage: {df['usr_cpu_usage'].min():,}")
print(f"Max System CPU Usage: {df['sys_cpu_usage'].max():,} ; Min System CPU Usage: {df['sys_cpu_usage'].min():,}")
print(f"Max Idle CPU Usage: {df['idl_cpu_usage'].max():,} ; Min Idle CPU Usage: {df['idl_cpu_usage'].min():,}")
print(f"Max Wait CPU Usage: {df['wai_cpu_usage'].max():,} ; Min Wait CPU Usage: {df['wai_cpu_usage'].min():,}")
print()
print (f"Max GPU Temperature: {df_gpu['temperature_gpu'].max():,} ; Min GPU Temperature: {df_gpu['temperature_gpu'].min():,}")
print (f"Max GPU Utilization (%): {df_gpu['utilization_gpu'].max():,} ; Min GPU Utilization: {df_gpu['utilization_gpu'].min():,}")
print (f"Max GPU Utilization Memory (MiB): {df_gpu['utilization_memory'].max():,} ; Min GPU Utilization Memory (MiB): {df_gpu['utilization_memory'].min():,}")
print (f"Max GPU Memory Total (MiB): {df_gpu['memory_total'].max():,} ; Min GPU Memory Total (MiB): {df_gpu['memory_total'].min():,}")
print (f"Max GPU Memory Free (MiB): {df_gpu['memory_free'].max():,} ; Min GPU Memory Free (MiB): {df_gpu['memory_free'].min():,}")
print (f"Max GPU Memory Used (MiB): {df_gpu['memory_used'].max():,} ; Min GPU Memory Used (MiB): {df_gpu['memory_used'].min():,}")

Max Read IO: 22,503,424.0 ; Min Read IO: 0.0
Max Write IO: 1,684,480.0 ; Min Write IO: 0.0
Max Read Disk: 184.0 ; Min Read Disk: 0.0
Max Write Disk: 108.0 ; Min Write Disk: 0.0
Max Used Memory: 12,893,880,320 ; Min Used Memory: 9,917,571,072
Max Free Memory: 23,150,911,488 ; Min Free Memory: 2,930,810,880
Max Buffer Memory: 847,872 ; Min Buffer Memory: 290,816
Max Cache Memory: 17,608,945,664 ; Min Cache Memory: 126,787,584
Max Receive Net: 1,821,575,356 ; Min Receive Net: 0
Max Send Net: 1,820,219,101 ; Min Send Net: 0
Max User CPU Usage: 21.178 ; Min User CPU Usage: 0.0
Max System CPU Usage: 38.258 ; Min System CPU Usage: 0.379
Max Idle CPU Usage: 99.494 ; Min Idle CPU Usage: 22.475
Max Wait CPU Usage: 52.02 ; Min Wait CPU Usage: 0.125

Max GPU Temperature: 58 ; Min GPU Temperature: 46
Max GPU Utilization (%): 100 ; Min GPU Utilization: 0
Max GPU Utilization Memory (MiB): 13 ; Min GPU Utilization Memory (MiB): 0
Max GPU Memory Total (MiB): 16,380 ; Min GPU Memory Total (MiB): 16,380


In [11]:
def plotAction(df_param: pd.DataFrame, X, Y):

    # plots action with time
    plt.figure()
    for x in X:
        for y in Y:
            plt.plot(df_param[x], df_param[y], label=y, marker = "+")
    plt.xlabel(X[0])
    plt.ylabel(Y[0])
    plt.title(f"{X} per {Y}")
    plt.legend()
    plt.show()

In [12]:
plotAction(df, ['system_time'], ['read_io_total_nops', 'write_io_total_nops'])

QStandardPaths: wrong permissions on runtime directory /run/user/0/, 0755 instead of 0700


In [13]:
# plots Disk with time
plotAction(df, ['system_time'], ['read_dsk_total_bytes', 'writ_dsk_total_bytes'])

In [14]:
# plots Memory with time
plt.figure()
plt.plot(df['system_time'], df['used_memory'], label='Used Memory')
plt.plot(df['system_time'], df['free_memory'], label='Free Memory')
plt.plot(df['system_time'], df['buff_memory'], label='Buffer Memory')
plt.plot(df['system_time'], df['cach_memory'], label='Cache Memory')
plt.xlabel('Time ')
plt.ylabel('Memory (bytes)')
plt.title('Memory with time')
plt.legend()
plt.show()

In [15]:
# plots Network with time
plt.figure()
plt.plot(df['system_time'], df['recv_net_total'], label='Received Net')
plt.plot(df['system_time'], df['send_net_total'], label='Send Net')
plt.xlabel('Time ')
plt.ylabel('Network (bytes)')
plt.title('Network with time')
plt.legend()
plt.show()

In [16]:
# plots CPU with time
plt.figure()
plt.plot(df['system_time'], df['usr_cpu_usage'], label='usr CPU usage')
plt.plot(df['system_time'], df['sys_cpu_usage'], label='sys CPU usage')
plt.plot(df['system_time'], df['idl_cpu_usage'], label='idl CPU usage')
plt.plot(df['system_time'], df['wai_cpu_usage'], label='wai CPU usage')
plt.xlabel('Time ')
plt.ylabel('CPU (%)')
plt.title('CPU with time')
plt.legend()
plt.show()

In [17]:
# plots gpu temperature with time
plt.figure()
plt.plot(df_gpu['timestamp'], df_gpu['temperature_gpu'], label='Temperature')
plt.xlabel('Time ')
plt.ylabel('Temperature (°C)')
plt.title('GPU Temperature with time')
plt.legend()
plt.show()

In [18]:
# plots gpu utilization with time
plt.figure()
plt.plot(df_gpu['timestamp'], df_gpu['utilization_gpu'], label='Utilization')
plt.xlabel('Time ')
plt.ylabel('Utilization (%)')
plt.title('GPU Utilization with time')
plt.legend()
plt.show()

In [19]:
# plots gpu memory with time
plt.figure()
plt.plot(df_gpu['timestamp'], df_gpu['memory_total'], label='Total Memory')
plt.plot(df_gpu['timestamp'], df_gpu['memory_free'], label='Free Memory')
plt.plot(df_gpu['timestamp'], df_gpu['memory_used'], label='Used Memory')
plt.xlabel('Time ')
plt.ylabel('Memory (MiB)')
plt.title('GPU Memory with time')
plt.legend()
plt.show()