## Computational complexity (FLOPs) of algorithms, accuracy and energy consumption

Pytorch pre-trained models used for inference \
Energy consumption is measured using power monitor. \
10000 inferences on same image \
Code for running inference in <https://github.com/MScDisseration/model_hub/tree/master/Inference>

In order to get FLOP - used the pytorch module for profiling https://github.com/Lyken17/pytorch-OpCounter that returns the parameters and the MACs (multiply accumulate operation). \
MAC : a <- a + (b x c)
This module collects all the multiply operations and FLOPs is approximated to be double of MAC to account for other operations like add and divide. https://github.com/Lyken17/pytorch-OpCounter/tree/master/benchmark#macs-flops-what-is-the-difference  
Code in <https://github.com/MScDisseration/model_hub/blob/master/models.py>


In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from datetime import timedelta
from scipy import stats
from sklearn import metrics

from matplotlib import cm
from adjustText import adjust_text
%matplotlib inline
%config InlineBackend.figure_format='retina'

mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.size'] = 12

plt.style.use('tableau-colorblind10')

In [2]:
url = 'logs/models.txt'
df = pd.read_csv(url)

In [3]:
df.describe()

Unnamed: 0,macs,parameters
count,35.0,35.0
mean,6351510000.0,50521390.0
std,6685961000.0,55086210.0
min,42524260.0,1235496.0
25%,565904300.0,5332784.0
50%,4111515000.0,23834570.0
75%,11333000000.0,107839000.0
max,22795600000.0,143678200.0


In [4]:
df['FLOPS'] = df['macs'] * 2
df['GFLOPS'] = (df['macs'] * 2) / (10 ** 9) 
df['TFLOPS'] = (df['macs'] * 2) / (10 ** 12)
df['Params(Million)'] = df['parameters'] / (10 ** 6)
df['Log'] = np.log(df['FLOPS'])
df['GFLOPS_log'] = np.log(df['GFLOPS'])
df['Training 1 epoch(pfs-days)'] = df['GFLOPS'] * 2 * 3 * (1.2 * (10 ** 6))

In [5]:
print(df.GFLOPS.count())
df.head()

35


Unnamed: 0,model,macs,parameters,FLOPS,GFLOPS,TFLOPS,Params(Million),Log,GFLOPS_log,Training 1 epoch(pfs-days)
0,alexnet,714691900.0,61100840.0,1429384000.0,1.429384,0.001429,61.10084,21.080509,0.357243,10291560.0
1,densenet121,2865672000.0,7978856.0,5731344000.0,5.731344,0.005731,7.978856,22.469216,1.74595,41265680.0
2,densenet161,7787013000.0,28681000.0,15574030000.0,15.574026,0.015574,28.681,23.46887,2.745605,112133000.0
3,densenet169,3398071000.0,14149480.0,6796142000.0,6.796142,0.006796,14.14948,22.639621,1.916355,48932220.0
4,densenet201,4340971000.0,20013928.0,8681942000.0,8.681942,0.008682,20.013928,22.884511,2.161245,62509980.0


In [6]:
url = "logs/accuracy.csv" ##top 1 and top 5 error
df_accuracy = pd.read_csv(url)
print(df_accuracy.top1.count())

29


In [7]:
merge_df = df.merge(df_accuracy, on='model')

In [8]:
print(merge_df.top1.count())
merge_df.head()

29


Unnamed: 0,model,macs,parameters,FLOPS,GFLOPS,TFLOPS,Params(Million),Log,GFLOPS_log,Training 1 epoch(pfs-days),top1,top5
0,alexnet,714691900.0,61100840.0,1429384000.0,1.429384,0.001429,61.10084,21.080509,0.357243,10291560.0,43.45,20.91
1,densenet121,2865672000.0,7978856.0,5731344000.0,5.731344,0.005731,7.978856,22.469216,1.74595,41265680.0,25.35,7.83
2,densenet161,7787013000.0,28681000.0,15574030000.0,15.574026,0.015574,28.681,23.46887,2.745605,112133000.0,22.35,6.2
3,densenet169,3398071000.0,14149480.0,6796142000.0,6.796142,0.006796,14.14948,22.639621,1.916355,48932220.0,24.0,7.0
4,densenet201,4340971000.0,20013928.0,8681942000.0,8.681942,0.008682,20.013928,22.884511,2.161245,62509980.0,22.8,6.43


In [9]:
merge_df['top1score'] = 100 - merge_df['top1']
merge_df['top5score'] = 100 - merge_df['top5']

In [None]:
# needs citation https://github.com/Phlya/adjustText
fig = plt.figure(figsize=(12,7))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel("GFLOPs")
ax.set_ylabel("Top-1 Error (%)")
ax.set_title("GFLOPs vs Top-1 error")

plt.scatter(merge_df['GFLOPS'], merge_df['top1'])

#merge_df[['GFLOPS','top1','model']].apply(lambda row: ax.text(*row),axis=1);
texts = [ax.text(x, y, name,size='large') 
         for x, y, name in zip(merge_df['GFLOPS'],merge_df['top1'],merge_df['model'])]

adjust_text(texts)
plt.tight_layout()
ax.figure.savefig('viz/top1error.png',dpi=300)

In [None]:
# needs citation https://github.com/Phlya/adjustText
fig = plt.figure(figsize=(12,7))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel("GFLOPs")
ax.set_ylabel("Top-5 Error (%)")
ax.set_title("GFLOPs vs Top-5 error")
# cmap = cm.get_cmap('winter')
# c = merge_df['top5']
plt.scatter(merge_df['GFLOPS'], merge_df['top5'])

texts = [ax.text(x, y, name,size='large') 
         for x, y, name in zip(merge_df['GFLOPS'],merge_df['top5'],merge_df['model'])]
# cbar = plt.colorbar()
# cbar.set_label('Top-5 Error (%)')
adjust_text(texts)
plt.tight_layout()
ax.figure.savefig('viz/top5error.png',dpi=300)

In [None]:
## in log scale
fig = plt.figure(figsize=(12,7))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel("FLOPs in log scale")
ax.set_ylabel("Top-5 Error (%)")
ax.set_title("FLOPs vs Top-5 error")
# cmap = cm.get_cmap('winter')
# c = merge_df['top5']
plt.scatter(merge_df['Log'], merge_df['top5'])

texts = [ax.text(x, y, name,size='large') 
         for x, y, name in zip(merge_df['Log'],merge_df['top5'],merge_df['model'])]
# cbar = plt.colorbar()
# cbar.set_label('Top-5 Error (%)')
adjust_text(texts)
plt.tight_layout()
ax.figure.savefig('viz/logFLOPStop5error.png',dpi=300)

In [None]:
# cmap1= cm.get_cmap('winter')
# ax = merge_df.plot('GFLOPS', 'top5', kind='scatter',
#         c=range(len(merge_df)),colormap=cmap1, figsize=(12,7),
#                   title="GFLOPs vs Top-5 error")
# ax.set_xlabel("GFLOPs")
# ax.set_ylabel("Top-5 Error (%)")
# #merge_df[['GFLOPS','top5','model']].apply(lambda row: ax.text(*row),axis=1);
# texts = [ax.text(x, y, name,size='large') 
#          for x, y, name in zip(merge_df['GFLOPS'],merge_df['top5'],merge_df['model'])]
# adjust_text(texts)
# plt.tight_layout()
# ax.figure.savefig('viz/top5error.png',dpi=300)

In [None]:
x, y = merge_df['GFLOPS'],merge_df['top5'] 
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
print(f"slope {slope} \nintercept {intercept}\nr_value {r_value}\np_value {p_value}\nstd_err {std_err}")
print("R squared", r_value ** 2)
r,p = stats.pearsonr(x, y)
print(f"Pearson's coeff {r}\np-value {p}")

In [None]:
plt.plot(x, y, 'o', label='original data')
plt.plot(x, intercept + slope*x, 'r', label='fitted line')
plt.legend()
plt.show()

The complexity measured in FLOPs does not necessarily improve the accuracy (or decrease the error).

However, within the same architecture, e.g., VGG and resnet, the error decreases linearly with FLOPs

In [None]:
vggset = ['vgg11','vgg13','vgg16','vgg19','vgg11_bn','vgg13_bn','vgg16_bn','vgg19_bn']
vgg = merge_df.loc[merge_df.model.isin(vggset)]
ax = vgg.plot('GFLOPS', 'top5', kind='scatter',
         figsize=(7,5))
ax.set_xlim(5,50)
ax.set_ylim(5,15)
ax.set_xlabel("GFLOPs")
ax.set_ylabel("Top-5 Error (%)")
ax.set_title("Top-5 error for VGG architectures")
vgg[['GFLOPS','top5','model']].apply(lambda row: ax.text(*row,ha='left',va='bottom',size='large'),axis=1);
plt.tight_layout()
ax.figure.savefig("viz/vggtop5error.png",dpi=300)

In [None]:
x, y = vgg['GFLOPS'],vgg['top5'] 
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
print(f"slope {slope} \nintercept {intercept}\nr_value {r_value}\np_value {p_value}\nstd_err {std_err}")
print("R squared", r_value ** 2)

## Power monitor readings

In [None]:
df_reading = pd.read_csv("logs/power_reading_2.csv")

In [None]:
df_reading['last_changed'] = pd.to_datetime(df_reading['last_changed'], errors='coerce')
df_reading.dropna(subset=['last_changed'], inplace=True)
df_reading.set_index('last_changed', inplace=True)

In [None]:
df_reading.tail()

In [None]:
# time intervals
df_time_interval = pd.read_csv("logs/pm.csv")

In [None]:
print(df_time_interval.model.count())
df_time_interval.head()

In [None]:
df_time_interval['start_seconds']= [datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S.%f") - timedelta(hours=1, minutes=0)
                                  for start_time in df_time_interval.start_time]
df_time_interval['end_seconds'] = [datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S.%f") - timedelta(hours=1, minutes=0)
                                for end_time in df_time_interval.end_time]
df_time_interval['total_time'] = df_time_interval['end_seconds'] - df_time_interval['start_seconds']
df_time_interval['total_time'] = df_time_interval['total_time'].apply(lambda x: x.total_seconds())
df_time_interval.head()

In [None]:
df_time_interval['s_date'] = [x.date().strftime("%Y-%m-%d") for x in df_time_interval["start_seconds"]]
df_time_interval['e_date'] = [x.date().strftime("%Y-%m-%d") for x in df_time_interval["end_seconds"]]
df_time_interval['s_time'] = [x.time().strftime("%H:%M:%S") for x in df_time_interval["start_seconds"]]
df_time_interval['e_time'] = [x.time().strftime("%H:%M:%S") for x in df_time_interval["end_seconds"]]
df_time_interval.head()

In [None]:
# not used
# def power_mean(s_date,e_date,s_time,e_time):
#     subset_df = df_reading.loc[s_date:e_date].between_time(s_time, e_time)[['power_consumption']]
#     return subset_df['power_consumption'].mean()


In [None]:
# get total power consumed 
# since power monitor reads at intervals of 3 seconds hence multiplying by 3
def energy_consumption(s_date,e_date,s_time,e_time):
    subset_df = df_reading.loc[s_date:e_date].between_time(s_time, e_time)[['power_consumption']]
    return ((subset_df['power_consumption']).sum() * 3) #Joules 

In [None]:
for index, row in df_time_interval.iterrows(): 
    df_time_interval.at[index,'empirical'] = energy_consumption(row['s_date'],row['e_date'],
                                            row['s_time'],row['e_time'])

In [None]:
# for index, row in df_time_interval.iterrows(): 
#     df_time_interval.at[index,'mean_power'] = power_mean(row['s_date'],row['e_date'],
#                                             row['s_time'],row['e_time'])

In [None]:
# df_time_interval['power'] = df_time_interval['mean_power'] * df_time_interval['total_time']
df_time_interval.head()

In [None]:
new_df = df_time_interval.merge(merge_df, on='model')
new_df.model.count()

In [None]:
new_df['empirical'] = new_df['empirical'] / (1000 * 3600) # Joules to kWh

In [None]:
fig, ax = plt.subplots()
fig.set_size_inches(12, 7)
ax.set_xlabel("GFLOPs")
ax.set_ylabel("Energy consumption (kWh)")
ax.set_title("GFLOPs vs energy consumption")
cmap = cm.get_cmap('winter')
c = new_df['top5score']
plt.ylim(min(new_df['empirical'])- 0.005,max(new_df['empirical'])+0.005)
plt.scatter(new_df['GFLOPS'], new_df['empirical'],
        c=c, cmap=cm.get_cmap('winter'))

texts = [ax.text(x, y, name,size='large') 
         for x, y, name in zip(new_df['GFLOPS'],new_df['empirical'],new_df['model'])]
cbar = plt.colorbar()
cbar.set_label('Top-5 score')
adjust_text(texts)
plt.tight_layout()
ax.figure.savefig('viz/inference_gflops_power.png',dpi=300)

## Run a linear regression

In [None]:
from scipy import stats
slope, intercept, r_value, p_value, std_err = stats.linregress(new_df['GFLOPS'],new_df['empirical'] )
print(f"slope {slope} \nintercept {intercept}\nr_value {r_value}\np_value {p_value}\nstd_err {std_err}")
print('R squared ', r_value ** 2)
mse = metrics.mean_squared_error(new_df['GFLOPS'],new_df['empirical'])
rmse = mse ** (1/2)
mae = metrics.mean_absolute_error(new_df['GFLOPS'],new_df['empirical'])
print(f"rmse: {rmse} \nmae: {mae}")

In [None]:
x = new_df['GFLOPS']
y = new_df['empirical']
plt.figure(figsize=(7,5))
plt.plot(x, y, 'o', label='original data')
plt.plot(x, intercept + slope*x, 'r', label='fitted line')
plt.xlabel("GFLOPs")
plt.ylabel("Energy (J)")
plt.title("GFLOPs vs energy consumption during inference")
plt.legend()
plt.tight_layout()
plt.savefig('viz/inference_gflops_power_fit.png',dpi=300)

## removing DenseNet model from the set 

In [None]:
df_minus_densenet = new_df[new_df['model'].str.match(r'(^densenet.*)') == False]
df_minus_densenet.model.count()

In [None]:
from scipy import stats
x = df_minus_densenet['GFLOPS']
y = df_minus_densenet['empirical'] 
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
print(f"slope {slope} \nintercept {intercept}\nr_value {r_value}\np_value {p_value}\nstd_err {std_err}")
print('R squared ', r_value ** 2)
mse = metrics.mean_squared_error(x,y)
rmse = mse ** (1/2)
mae = metrics.mean_absolute_error(x, y)
print(f"rmse: {rmse} \nmae: {mae}")

In [None]:
x = df_minus_densenet['GFLOPS']
y = df_minus_densenet['empirical']
plt.figure(figsize=(7,5))
plt.ylim(min(df_minus_densenet['empirical'])- 0.005,max(df_minus_densenet['empirical'])+0.005)
plt.plot(x, y, 'o', label='original data')
plt.plot(x, intercept + slope*x, 'r', label='fitted line')
plt.xlabel("GFLOPs")
plt.ylabel("Energy (kWh)")
plt.title("GFLOPs vs Energy consumption")
plt.legend()
plt.tight_layout()
plt.savefig('viz/inference_fit_minus_densenet.png',dpi=300)

## Testing within same architecture
Selected VGG for testing 

In [None]:
vgg_df = new_df.merge(vgg, on='model')

In [None]:
vgg_df.columns

In [None]:
#ax = vgg_df.plot('GFLOPS', 'power', kind='scatter')
slope, intercept, r_value, p_value, std_err = stats.linregress(vgg_df['GFLOPS_y'],vgg_df['empirical'] )
print(f"slope {slope} \nintercept {intercept}\nr_value {r_value}\np_value {p_value}\nstd_err {std_err}")
print('R squared ', r_value ** 2)
mse = metrics.mean_squared_error(x,y)
rmse = mse ** (1/2)
mae = metrics.mean_absolute_error(x, y)
print(f"rmse: {rmse} \nmae: {mae}")

In [None]:
x = vgg_df['GFLOPS_y']
y = vgg_df['empirical']
plt.ylim(min(vgg_df['empirical'])- 0.0005,max(vgg_df['empirical'])+0.0005)
plt.plot(x, y, 'o', label='original data')
plt.plot(x, intercept + slope*x, 'r', label='fitted line')
plt.xlabel("GFLOPs")
plt.ylabel("Energy (kWh)")
plt.title("GFLOPs vs energy consumption for VGG architecture")
plt.legend()
plt.tight_layout()
plt.savefig('viz/vgg_gflops_power_fit.png',dpi=300)

## data for appendix

In [None]:
df_co2 = pd.read_csv("../BERT/results/co2data.csv")
uk_co2 = df_co2.loc[df_co2.country == 'United Kingdom'].impact.mean()

new_df['CO2e'] = uk_co2 * df_time_interval['empirical'] / (1000 * 3600)
new_df['kg CO2e'] = new_df['CO2e'] /1000

In [None]:
features = ['model','GFLOPS','empirical','kg CO2e','top1score','top5score']
cv_inference = new_df[features]
cv_inference.to_csv("results/cv_inference.csv", index=False)