# Training power modelling

Using 16 NLP models like GPT2, BERT, RoBERTa and their different sizes (layers, attention heads) with batch sizes 8, 16, 32


In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
%config InlineBackend.figure_format='retina'

mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.size'] = 12

plt.style.use('tableau-colorblind10')

In [3]:
df_reading = pd.read_csv("../results/bert-tiny-8-pm.csv")
df_reading['last_changed'] = pd.to_datetime(df_reading['last_changed'], errors='coerce')
df_reading.dropna(subset=['last_changed'], inplace=True)
df_reading.set_index('last_changed', inplace=True)

In [8]:
def read_data(url):
    df = pd.read_csv(url)
    df = df.loc[df[" utilization.gpu [%]"] > 0] #isolating training 
    #print(df.columns)
    return df

In [9]:
def time_duration(df):
    df.timestamp = [datetime.strptime(time, "%Y/%m/%d %H:%M:%S.%f") for time in df.timestamp]
    time_interval = df.iloc[-1].timestamp - df.iloc[0].timestamp
    #print("Total time in seconds: {}".format(time_interval.total_seconds()))
    return time_interval.total_seconds()

In [13]:
def power_reading(url):
    df = read_data(url)
    time_interval = time_duration(df)
    df_gpu1 = df.iloc[::2]
    df_gpu2 = df.iloc[1::2]
    mean_power = df[' power.draw [W]'].mean() # refer notebook on time series
    total_power = mean_power * time_interval * 2 / (1000 *60*60)
    summed_power = df[' power.draw [W]'].sum() / (1000 *60*60)
    gpu1_mean = df_gpu1[' power.draw [W]'].mean()
    gpu2_mean = df_gpu2[' power.draw [W]'].mean()
    gpu1_util = df_gpu1[' utilization.gpu [%]'].mean()
    gpu2_util = df_gpu2[' utilization.gpu [%]'].mean()
    gpu_util = (gpu1_util + gpu2_util) / 2
    memory_util = df[' utilization.memory [%]'].mean()
    memory_used = df[' memory.used [MiB]'].mean()
    extract = url.split('/')[1].replace(".","_").split('_')
    end_time = df.iloc[-1].timestamp
    start_time = df.iloc[0].timestamp
    data = []
    data = [extract[2], extract[3],
                mean_power,time_interval,total_power,summed_power,gpu_util,
                gpu1_util,gpu2_util,gpu1_mean,gpu2_mean,
                memory_util,memory_used, start_time, end_time]
#     else:
#         data = [extract[2], None, None,
#                 mean_power,time_interval,total_power,gpu_util,memory_util,memory_used]
    return df, data

In [17]:
import os
folder_name = 'nvidia'
result = pd.DataFrame(columns=['dataset','batchsize',
                               'mean_power','total_time','power','summed_power','gpu_util',
                               'gpu1_util','gpu2_util','gpu1_mean_power','gpu2_mean_power',
                               'memory_util','memory_used',
                               'start_time','end_time'])

for file_name in os.listdir(folder_name):
    #print(file_name)
    if(file_name != '.DS_Store'):
        url = folder_name + '/' + file_name
        df,data = power_reading(url)
        #print (data)
        if data:
            result =result.append(pd.Series(data, index=result.columns),ignore_index=True)
    #print ("-------------------------------------------")

['bert-tiny', '16', 125.7612258064516, 177.024, 0.012368197353978493, 0.010829438888888888, 42.412903225806446, 45.174193548387095, 39.651612903225804, 127.2341935483871, 124.28825806451613, 18.725806451612904, 4768.629032258064, Timestamp('2020-07-15 18:10:54.257000'), Timestamp('2020-07-15 18:13:51.281000')]
['bert-tiny', '8', 77.20702341137124, 322.841, 0.013847551469528058, 0.012824944444444446, 30.45484949832776, 32.87290969899666, 28.036789297658864, 79.74230769230769, 74.67173913043477, 9.879598662207357, 763.2993311036789, Timestamp('2020-07-16 18:11:22.381000'), Timestamp('2020-07-16 18:16:45.222000')]


In [18]:
result

Unnamed: 0,dataset,batchsize,mean_power,total_time,power,summed_power,gpu_util,gpu1_util,gpu2_util,gpu1_mean_power,gpu2_mean_power,memory_util,memory_used,start_time,end_time
0,bert-tiny,16,125.761226,177.024,0.012368,0.010829,42.412903,45.174194,39.651613,127.234194,124.288258,18.725806,4768.629032,2020-07-15 18:10:54.257,2020-07-15 18:13:51.281
1,bert-tiny,8,77.207023,322.841,0.013848,0.012825,30.454849,32.87291,28.036789,79.742308,74.671739,9.879599,763.299331,2020-07-16 18:11:22.381,2020-07-16 18:16:45.222


In [21]:
result[["batchsize"]] = result[["batchsize"]].apply(pd.to_numeric)
result["total_batchsize"] = result["batchsize"] * 2
result.head()

Unnamed: 0,dataset,batchsize,mean_power,total_time,power,summed_power,gpu_util,gpu1_util,gpu2_util,gpu1_mean_power,gpu2_mean_power,memory_util,memory_used,start_time,end_time,total_batchsize
0,bert-tiny,16,125.761226,177.024,0.012368,0.010829,42.412903,45.174194,39.651613,127.234194,124.288258,18.725806,4768.629032,2020-07-15 18:10:54.257,2020-07-15 18:13:51.281,32
1,bert-tiny,8,77.207023,322.841,0.013848,0.012825,30.454849,32.87291,28.036789,79.742308,74.671739,9.879599,763.299331,2020-07-16 18:11:22.381,2020-07-16 18:16:45.222,16


In [23]:
result['start_seconds']= [start_time - timedelta(hours=1, minutes=0)
                                  for start_time in result.start_time]
result['end_seconds'] = [end_time - timedelta(hours=1, minutes=0)
                                for end_time in result.end_time]
result.head()

Unnamed: 0,dataset,batchsize,mean_power,total_time,power,summed_power,gpu_util,gpu1_util,gpu2_util,gpu1_mean_power,gpu2_mean_power,memory_util,memory_used,start_time,end_time,total_batchsize,start_seconds,end_seconds
0,bert-tiny,16,125.761226,177.024,0.012368,0.010829,42.412903,45.174194,39.651613,127.234194,124.288258,18.725806,4768.629032,2020-07-15 18:10:54.257,2020-07-15 18:13:51.281,32,2020-07-15 17:10:54.257,2020-07-15 17:13:51.281
1,bert-tiny,8,77.207023,322.841,0.013848,0.012825,30.454849,32.87291,28.036789,79.742308,74.671739,9.879599,763.299331,2020-07-16 18:11:22.381,2020-07-16 18:16:45.222,16,2020-07-16 17:11:22.381,2020-07-16 17:16:45.222


In [24]:
result['s_date'] = [x.date().strftime("%Y-%m-%d") for x in result["start_seconds"]]
result['e_date'] = [x.date().strftime("%Y-%m-%d") for x in result["end_seconds"]]
result['s_time'] = [x.time().strftime("%H:%M:%S") for x in result["start_seconds"]]
result['e_time'] = [x.time().strftime("%H:%M:%S") for x in result["end_seconds"]]

In [28]:
# test power consumption from power monitor in notebook power_monitor_analysis/power%20consumption.ipynb
def power_mean(s_date,e_date,s_time,e_time):
    subset_df = df_reading.loc[s_date:e_date].between_time(s_time, e_time)[['power_consumption']]
    return subset_df['power_consumption'].mean()

In [26]:
for index, row in result.iterrows(): 
    result.at[index,'pm_mean_power'] = power_mean(row['s_date'],row['e_date'],
                                            row['s_time'],row['e_time'])

In [27]:
result['pm_power'] = (result['pm_mean_power'] * result['total_time']) / (1000 * 3600)
result.head()

Unnamed: 0,dataset,batchsize,mean_power,total_time,power,summed_power,gpu_util,gpu1_util,gpu2_util,gpu1_mean_power,...,end_time,total_batchsize,start_seconds,end_seconds,s_date,e_date,s_time,e_time,pm_mean_power,pm_power
0,bert-tiny,16,125.761226,177.024,0.012368,0.010829,42.412903,45.174194,39.651613,127.234194,...,2020-07-15 18:13:51.281,32,2020-07-15 17:10:54.257,2020-07-15 17:13:51.281,2020-07-15,2020-07-15,17:10:54,17:13:51,325.694271,0.016015
1,bert-tiny,8,77.207023,322.841,0.013848,0.012825,30.454849,32.87291,28.036789,79.742308,...,2020-07-16 18:16:45.222,16,2020-07-16 17:11:22.381,2020-07-16 17:16:45.222,2020-07-16,2020-07-16,17:11:22,17:16:45,233.42288,0.020933
