# BUILDING THE jobs_execution_info.csv FILE.
The file composes together the information about each job (in jobs_data.csv) with the measurements from the sensors (in sensors_measures.csv)

In [1]:
import glob
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from IPython.display import display
#pd.reset_option('display.max_rows', silent=True)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.display.float_format = None

def read_sensors_measures_files(path):
    all_files = glob.glob(os.path.join(path , "sensors_measures*.csv"))
    li = []
    for filename in all_files:
        print("Working on: "+filename)
        df = pd.read_csv(filename, index_col=None,header=0, dtype={'notes': "string", 'delta_e': np.float64})
        li.append(df)   
    df = pd.concat(li, axis=0)#, ignore_index=True)
    return df

sdf = read_sensors_measures_files(r'.')
sdf.head(20)
sdf.set_index(['jobid', 'measure_ts', 'nodename']).index.is_unique


Working on: ./sensors_measures_0.csv
Working on: ./sensors_measures_1.csv
Working on: ./sensors_measures_3.csv
Working on: ./sensors_measures_2.csv
Working on: ./sensors_measures_5.csv
Working on: ./sensors_measures_4.csv


True

In [2]:
##ONLY FOR TESTING
#sdf = sdf.loc[sdf['jobid'].isin([909611 ])] #944853
#energy=0
#for index, row in sdf.iterrows():
#    energy+=row['delta_e']
#print(energy)    
#display(sdf)

In [3]:
jdf = pd.read_csv('jobs_data.csv', index_col=None,header=0)
jdf.head()


Unnamed: 0,jobid,jobname,start_ts,end_ts,nodes,condition_n,algorithm,fault_tolerance,fault_happened,computing_proc,total_proc,matrix_size,precision,balanced,repetition,req_whole_time,number_of_nodes,node1,node2,node3,node4,node5,node6,node7,node8,node9,node10,node11,node12,node13,node14,node15,node16,IMe_fault_level,IMe_fault_rank,SPK_checkpoint_iteration,blocking_factor,NxSOCK,R,rank_assignment,error_code,runtime,init_and_call_runtime,call_runtime
0,891576,SPKFT2_nf2_cp576_tnp578_ms42240_double_lbn_r1_...,1701589448,1701589519,48*cresco6x051:48*cresco6x114:48*cresco6x147:4...,1,SPK,2,2,576,578,42240,double,n,1,15,13,48,48,48,48,48,48,48,48,48,48,48,48,2,0,0,0,2,2,21120,22,-,1.0,f,-99,71,47,32
1,891577,IMeCOFT4_nf0_cp576_tnp672_ms42240_double_lbn_r...,1701589543,1701590227,48*cresco6x214:48*cresco6x113:48*cresco6x230:4...,1,IMeCO,4,0,576,672,42240,double,n,1,15,14,48,48,48,48,48,48,48,48,48,48,48,48,48,48,0,0,2,2,-,-,-,1.0,f,0,685,659,652
2,891578,SPKFT4_nf0_cp576_tnp580_ms42240_double_lbn_r1_...,1701590250,1701590319,48*cresco6x299:48*cresco6x246:48*cresco6x221:4...,1,SPK,4,0,576,580,42240,double,n,1,15,13,48,48,48,48,48,48,48,48,48,48,48,48,4,0,0,0,2,2,21120,22,-,1.0,f,0,69,47,32
3,891579,IMeCOFT4_nf4_cp576_tnp672_ms42240_double_lbn_r...,1701590345,1701590957,48*cresco6x229:48*cresco6x303:48*cresco6x113:4...,1,IMeCO,4,4,576,672,42240,double,n,1,15,14,48,48,48,48,48,48,48,48,48,48,48,48,48,48,0,0,2,2,-,-,-,1.0,f,0,612,588,581
4,891580,SPKFT4_nf4_cp576_tnp580_ms42240_double_lbn_r1_...,1701590976,1701591048,48*cresco6x188:48*cresco6x062:48*cresco6x081:4...,1,SPK,4,4,576,580,42240,double,n,1,15,13,48,48,48,48,48,48,48,48,48,48,48,48,4,0,0,0,2,2,21120,22,-,1.0,f,-99,72,47,32


In [4]:
energy_df = sdf.filter(['jobid','delta_e'], axis=1).groupby('jobid').sum()
energy_df= energy_df.rename(columns={"delta_e": "energy_from_delta_e"})
energy_df.head()


Unnamed: 0_level_0,energy_from_delta_e
jobid,Unnamed: 1_level_1
879962,0.03901
879963,0.00398
879964,0.03851
879965,0.00423
879966,0.03864


In [5]:
energy2_df = sdf.filter(['jobid','measure_ts','nodename','node_energy'], axis=1)
energy_min_df = energy2_df.groupby(['jobid','nodename']).min('measure_ts').rename(columns={"node_energy": "starting_node_energy"})#.drop(columns=['measure_ts'])
energy_max_df = energy2_df.groupby(['jobid','nodename']).max('measure_ts').rename(columns={"node_energy": "ending_node_energy"})#.drop(columns=['measure_ts'])
display(energy_min_df.head())
display(energy_max_df.head())

energy_min_df=energy_min_df.drop(columns=['measure_ts'])
energy_max_df=energy_max_df.drop(columns=['measure_ts'])


energy3_df = pd.concat([energy_min_df, energy_max_df], axis=1, join="inner")
energy3_df['energy_from_subtraction'] = energy3_df['ending_node_energy'] - energy3_df['starting_node_energy']
display(energy3_df.head())

energy4_df = energy3_df.filter(['jobid','energy_from_subtraction'], axis=1).groupby('jobid').sum()
display(energy4_df.head())


Unnamed: 0_level_0,Unnamed: 1_level_0,measure_ts,starting_node_energy
jobid,nodename,Unnamed: 2_level_1,Unnamed: 3_level_1
879962,114,1700602023,1858.41351
879962,184,1700602023,105.33058
879962,186,1700602023,114.13925
879963,149,1700602228,108.9023
879963,226,1700602227,116.11544


Unnamed: 0_level_0,Unnamed: 1_level_0,measure_ts,ending_node_energy
jobid,nodename,Unnamed: 2_level_1,Unnamed: 3_level_1
879962,114,1700602212,1858.43085
879962,184,1700602212,105.33758
879962,186,1700602212,114.15392
879963,149,1700602254,108.90397
879963,226,1700602254,116.11692


Unnamed: 0_level_0,Unnamed: 1_level_0,starting_node_energy,ending_node_energy,energy_from_subtraction
jobid,nodename,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
879962,114,1858.41351,1858.43085,0.01734
879962,184,105.33058,105.33758,0.007
879962,186,114.13925,114.15392,0.01467
879963,149,108.9023,108.90397,0.00167
879963,226,116.11544,116.11692,0.00148


Unnamed: 0_level_0,energy_from_subtraction
jobid,Unnamed: 1_level_1
879962,0.03901
879963,0.00398
879964,0.03851
879965,0.00423
879966,0.03864


In [6]:
power_series_df = sdf.filter(['jobid','measure_ts','sys_power'], axis=1).groupby(['jobid', 'measure_ts']).agg({'sys_power': 'sum'})
power_series_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,sys_power
jobid,measure_ts,Unnamed: 2_level_1
879962,1700602023,380
879962,1700602024,340
879962,1700602025,360
879962,1700602026,350
879962,1700602027,360


In [7]:
power_df = power_series_df.groupby('jobid').agg({'sys_power': ['mean', 'max']})
power_df.columns = power_df.columns.get_level_values(1)
power_df= power_df.rename(columns={"mean": "mean_sys_power", "max": "max_sys_power"})
power_df.head()

Unnamed: 0_level_0,mean_sys_power,max_sys_power
jobid,Unnamed: 1_level_1,Unnamed: 2_level_1
879962,740.631579,860
879963,523.928571,800
879964,754.324324,880
879965,587.407407,810
879966,754.702703,880


In [8]:
info_df=energy_df.join(energy4_df, on='jobid').join(power_df, on='jobid')
info_df['energy_equal']= (np.isclose(info_df['energy_from_delta_e'],info_df['energy_from_subtraction']))
#print(str(len(info_df.loc[info_df['energy_equal']==False])))
#info_df.loc[info_df['energy_equal']==False].head()
#info_df=info_df.loc[info_df['energy_equal']==True]
info_df.head()

Unnamed: 0_level_0,energy_from_delta_e,energy_from_subtraction,mean_sys_power,max_sys_power,energy_equal
jobid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
879962,0.03901,0.03901,740.631579,860,True
879963,0.00398,0.00398,523.928571,800,True
879964,0.03851,0.03851,754.324324,880,True
879965,0.00423,0.00423,587.407407,810,True
879966,0.03864,0.03864,754.702703,880,True


In [9]:
filtered_jdf_df = jdf.filter(['jobid','error_code','runtime','matrix_size', 'precision','balanced','rank_assignment','number_of_nodes', 'total_proc', 'algorithm'], axis=1)
filtered_jdf_df.head()
info_df=info_df.join(filtered_jdf_df.set_index('jobid'), on='jobid')
info_df['mean_sys_power_from_delta_e']=info_df['energy_from_delta_e']*3600000/info_df['runtime']
info_df.head()

info_df['energy_equal']= (np.isclose(info_df['energy_from_delta_e'],info_df['energy_from_subtraction']))


In [10]:
info_df.to_csv('jobs_info.csv', float_format='%f',index=True)
print("jobs_info.csv DONE!")

jobs_info.csv DONE!
