# Use Case: Comparing Two Models
We use our regression-based model to predict energy consumption without requiring physical hardware execution. 

The method generalizes across configurations and highlights the contribution of different Transformer operations

In [4]:
import pandas as pd
from UseCaseFunctions import *
# Define a list to store the results
results_list = []
results_list2 = []

v_max = 156e12  # 156 TFLOPs/s for A100

# --- Model A ---
model_a_params = {
    'batch_size': 64,
    'sequence_length': 320,
    'layers': 6,
    'heads': 8,
    'd_model': 512
}
# --- Model A ---
energy_model_a = compute_total_energy(**model_a_params, v_max=v_max)
flops = calculate_flops(model_a_params['batch_size'], model_a_params['sequence_length'], 
                        model_a_params['heads'], model_a_params['d_model'])
durations, etas = calculate_duration(flops, v_max, model_a_params['layers'])

# Keep FLOPs in TFLOPs
flops_tf = {k: v*1e-12 for k, v in flops.items()}

results_list.append({"Model": "A", **model_a_params, **flops_tf, **durations, **etas,
                     'Energy Consumption (Joules)': energy_model_a})


# --- Model B ---
model_b_params = {
    'batch_size': 64,
    'sequence_length': 320,
    'layers': 12,
    'heads': 12,
    'd_model': 768
}
energy_model_b = compute_total_energy(**model_b_params, v_max=v_max)
flops = calculate_flops(model_b_params['batch_size'], model_b_params['sequence_length'], 
                        model_b_params['heads'], model_b_params['d_model'])
durations, etas = calculate_duration(flops, v_max, model_b_params['layers'])
flops = {k: v*1e-12 for k, v in flops.items()}  # FLOPs in TFLOPs

results_list.append({"Model": "B", **model_b_params,**flops, **durations, **etas, 
                     'Energy Consumption (Joules)': energy_model_b})

# Create the DataFrames
df = pd.DataFrame(results_list)

print("\n--- Full Results ---")
#print(df)
df


--- Full Results ---


Unnamed: 0,Model,batch_size,sequence_length,layers,heads,d_model,t_qkv_projections_flops,t_score_flops,t_output_flops,t_final_projection_flops,t_qkv_projections,t_score,t_output,t_final_projection,t_qkv_projections_hef,t_score_hef,t_output_hef,t_final_projection_hef,Energy Consumption (Joules)
0,A,64,320,6,8,512,0.032212,0.006711,0.006711,0.010737,35.084269,33.28846,26.43845,33.87013,35.31306,7.753769,9.762714,12.192974,36.063681
1,B,64,320,12,12,768,0.072478,0.010066,0.010066,0.024159,108.906406,74.211373,59.048902,88.312207,51.192562,10.434156,13.11342,21.043516,78.963263


# Estimated energy consumption (in joules) as a function of Transformer layers and attention heads.

In [12]:
import pandas as pd

# Define ranges for varying parameters
layers_range = range(2, 64, 2)
heads_range = range(2, 17, 2)
batch_size = 64
sequence_length = 320
d_model = 512

# Generate data
results = []
for layers in layers_range:
    for heads in heads_range:
        energy = compute_total_energy(batch_size, sequence_length, layers, heads, d_model, v_max)
        results.append({
            "layers": layers,
            "heads": heads,
            "energy_consumption_joules": energy
        })

# Create a DataFrame
df = pd.DataFrame(results)

# Pivot the table for better readability
pivot_table = df.pivot(index='layers', columns='heads', values='energy_consumption_joules')
print("Energy Consumption (Joules) by Number of Layers and Heads:")
pivot_table


Energy Consumption (Joules) by Number of Layers and Heads:


heads,2,4,6,8,10,12,14,16
layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,14.440694,14.440694,14.434554,14.440694,14.434554,14.416051,14.416051,14.440694
4,25.252188,25.252188,25.239907,25.252188,25.239907,25.202903,25.202903,25.252188
6,36.063681,36.063681,36.045261,36.063681,36.045261,35.989754,35.989754,36.063681
8,46.875175,46.875175,46.850615,46.875175,46.850615,46.776606,46.776606,46.875175
10,57.686669,57.686669,57.655968,57.686669,57.655968,57.563457,57.563457,57.686669
12,68.498163,68.498163,68.461322,68.498163,68.461322,68.350309,68.350309,68.498163
14,79.309656,79.309656,79.266675,79.309656,79.266675,79.13716,79.13716,79.309656
16,90.12115,90.12115,90.072029,90.12115,90.072029,89.924011,89.924011,90.12115
18,100.932644,100.932644,100.877383,100.932644,100.877383,100.710863,100.710863,100.932644
20,111.744138,111.744138,111.682736,111.744138,111.682736,111.497714,111.497714,111.744138


# estimated energy consumption (in Joules) of the transformer model by varying the number of layers and the dimensionality (d\_model) of the model architecture.

In [14]:
import pandas as pd

# Define ranges for varying parameters
layers_range = range(2, 64, 2)
d_range = range(64, 64*21, 64)
heads=6
# Generate data
results = []
for layers in layers_range:
    for d_model in d_range:
        energy = compute_total_energy(batch_size, sequence_length, layers, heads, d_model, v_max)
        results.append({
            "layers": layers,
            "d_model": d_model,
            "energy_consumption_joules": energy
        })

# Create a DataFrame
df = pd.DataFrame(results)

# Pivot the table for better readability
pivot_table = df.pivot(index='layers', columns='d_model', values='energy_consumption_joules')
print("Energy Consumption (Joules) by Number of Layers and d_model")
#print(pivot_table)
pivot_table

Energy Consumption (Joules) by Number of Layers and d_model


d_model,64,128,192,256,320,384,448,512,576,640,704,768,832,896,960,1024,1088,1152,1216,1280
layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2,9.105961,10.425372,11.370815,12.115392,12.78894,13.391552,13.922438,14.434554,14.913794,15.349122,15.777669,16.184877,16.558509,16.929224,17.283464,17.609389,17.93433,18.245926,18.53313,18.821352
4,14.582722,17.221544,19.112431,20.601584,21.948679,23.153904,24.215675,25.239907,26.198387,27.069044,27.926138,28.740554,29.487818,30.229248,30.937727,31.589579,32.23946,32.862652,33.43706,34.013504
6,20.059484,24.017715,26.854046,29.087776,31.108419,32.916255,34.508913,36.045261,37.482981,38.788965,40.074607,41.296231,42.417128,43.529271,44.591991,45.569768,46.54459,47.479378,48.34099,49.205656
8,25.536245,30.813887,34.595662,37.573968,40.268159,42.678607,44.802151,46.850615,48.767575,50.508887,52.223076,53.851908,55.346437,56.829295,58.246255,59.549958,60.84972,62.096104,63.24492,64.397808
10,31.013006,37.610059,42.337277,46.06016,49.427898,52.440959,55.095388,57.655968,60.052169,62.228809,64.371545,66.407586,68.275746,70.129319,71.900519,73.530147,75.15485,76.71283,78.14885,79.58996
12,36.489767,44.406231,50.078893,54.546352,58.587638,62.203311,65.388626,68.461322,71.336762,73.948731,76.520014,78.963263,81.205055,83.429343,85.554782,87.510337,89.45998,91.329556,93.05278,94.782112
14,41.966529,51.202402,57.820508,63.032544,67.747378,71.965663,75.681864,79.266675,82.621356,85.668652,88.668483,91.51894,94.134364,96.729366,99.209046,101.490526,103.76511,105.946282,107.95671,109.974265
16,47.44329,57.998574,65.562123,71.518736,76.907117,81.728015,85.975101,90.072029,93.90595,97.388574,100.816951,104.074617,107.063674,110.02939,112.86331,115.470716,118.07024,120.563008,122.86064,125.166417
18,52.920051,64.794746,73.303739,80.004927,86.066857,91.490366,96.268339,100.877383,105.190544,109.108496,112.96542,116.630294,119.992983,123.329414,126.517573,129.450905,132.37537,135.179734,137.76457,140.358569
20,58.396812,71.590918,81.045354,88.491119,95.226596,101.252718,106.561577,111.682736,116.475137,120.828418,125.113889,129.185971,132.922292,136.629438,140.171837,143.431095,146.6805,149.79646,152.6685,155.550721
