## Aggregating and visualizing resource usage of all models 

In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np

##### The resource use readings were manually added to a csv file after each run and then loaded here as you see. 
##### BERT+GPT was also manually created by simply adding the additional GPT call readings to the readings of BERTopic 

In [None]:
BERT = pd.read_csv('Aggregated_Execution_Data.csv')
order = [1, 0, 2, 3, 4]
BERT = BERT.reindex(order).reset_index(drop=True)
BERT.head()

In [None]:
GPT = pd.read_csv('New_Execution_Data.csv')
GPT.loc[0] = ["R1", 246.24772490002215, 0.010311008968837672,  1.0494308471679688]
GPT.head()

In [None]:
#BERTGPT
BERTGPT = {
    "Run": ["R1", "R2", "R3", "R4", "R5"],
    "Total Execution Time (s)": [3083.537871, 2999.055829, 2315.157078, 3111.906329, 3176.367225],
    "Total CPU Usage (%)": [98.451443, 97.335971, 96.903467, 98.851763, 71.373458],
    "Total Peak Memory Usage (MB)": [692.627967, 667.752795, 593.163465, 684.064885, 628.145993]
}

BERTGPT = pd.DataFrame(BERT)

#
execution_times = [23.94018610008061, 28.39324160013348, 25.714675500057638, 24.874196600168943, 31.00697570014745]
cpu_usages = [0.016316706911425585, 0.044712444879630504, 0.049369911356538586, 0.031408049576752715, 0.047242393588002654]
peak_memory_usages = [1.4425983428955078, 0.3029136657714844, 0.2512035369873047, 0.23195838928222656, 0.28528499603271484]

# 
BERTGPT["Total Execution Time (s)"] += execution_times
BERTGPT["Total CPU Usage (%)"] += cpu_usages
BERTGPT["Total Peak Memory Usage (MB)"] += peak_memory_usages

#
BERTGPT


## Visualizations 

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
#
width = 0.2  
positions = [0, 1, 2, 3, 4]
#
ax.bar([p - width for p in positions], BERT["Total Execution Time (s)"], width=width, color='blue', alpha=0.7, label="BERTopic Execution Time (s)")
ax.bar(positions, BERTGPT["Total Execution Time (s)"], width=width, color='orange', alpha=0.7, label="BERT+GPT Execution Time (s)")
ax.bar([p + width for p in positions], GPT["Execution Time (s)"], width=width, color='red', alpha=0.7, label="GPT Execution Time (s)")
#
ax.set_ylabel("Execution Time (s)")
ax.set_title("Comparison of Execution Time")
ax.set_xticks(positions)
ax.set_xticklabels(BERT["Run"])
#
ax.legend(fontsize='small')
#
plt.xlim(-0.5, len(positions) - 0.5)
#
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(BERT.index, BERT["Total CPU Usage (%)"], marker='o', linestyle='-', color='blue', label='BERT Total CPU Usage (%)')
plt.plot(BERTGPT.index, BERT["Total CPU Usage (%)"], marker='o', linestyle='--', color='orange', label='BERT+GPT Total CPU Usage (%)')
plt.plot(GPT.index, GPT["CPU Usage (%)"], marker='o', linestyle='-', color='red', label='GPT CPU Usage (%)')
plt.yscale('log')
plt.ylabel("CPU Usage (%)")
plt.title("Comparison of CPU Usage (Logarithmic Scale)")
plt.legend()
plt.show()

In [None]:

#
GPT = {
    "Run": ["R1", "R2", "R3", "R4", "R5"],
    "Total Peak Memory Usage (MB)": [1.049431, 0.802281, 0.531520, 0.591152, 0.774975]
}

#
BERT = {
    "Run": ["R1", "R2", "R3", "R4", "R5"],
    "Total Peak Memory Usage (MB)": [692.627967, 667.752795, 593.163465, 684.064885, 628.145993]
}

# 
BERTGPT = {
    "Run": ["R1", "R2", "R3", "R4", "R5"],
    "Total Peak Memory Usage (MB)": [695.513164, 668.358623, 593.665873, 684.528802, 628.716563]
}

df1 = pd.DataFrame(GPT)
df2 = pd.DataFrame(BERT)
df3 = pd.DataFrame(BERTGPT)

# 
df1['Dataset'] = 'GPT'
df2['Dataset'] = 'BERT'
df3['Dataset'] = 'BERT+GPT'
combined_df = pd.concat([df1, df2, df3])

# 
fig, ax = plt.subplots(figsize=(14, 8))
# 
bar_width = 0.2
index = np.arange(len(df1))
bar1 = ax.bar(index - bar_width, df1["Total Peak Memory Usage (MB)"], bar_width, label='GPT')
bar2 = ax.bar(index, df2["Total Peak Memory Usage (MB)"], bar_width, label='BERT')
bar3 = ax.bar(index + bar_width, df3["Total Peak Memory Usage (MB)"], bar_width, label='BERT+GPT')

###############
def annotate_bars(bars):
    for bar in bars:
        height = bar.get_height()
        ax.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
                    xytext=(0, 5),  # 5 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom',
                    fontsize=8)  # Smaller font size

annotate_bars(bar1)
annotate_bars(bar2)
annotate_bars(bar3)

####################
ax.set_xlabel("Run")
ax.set_ylabel("Total Peak Memory Usage (MB)")
ax.set_title("Comparison of Peak Memory Usage (MB) Between All Models")
ax.set_xticks(index)
ax.set_xticklabels(df1["Run"])
ax.legend()

#
plt.show()
