In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
plt.style.use('seaborn-v0_8-paper')
pt = 1./72.27
fig_width = 241 * pt
aspect_ratio = (1 + 8 ** 0.5) / 2
plt.rcParams["figure.figsize"] = (fig_width, fig_width/aspect_ratio)
plt.rcParams["savefig.bbox"] = "tight"
sns.set_theme(style='whitegrid', context='paper', rc={'axes.facecolor': 'white', 'figure.figsize': (fig_width, fig_width/aspect_ratio)})

colors = ["#00a1de", "#009b3a", "#c60c30", "#f9461c", "#532A92"]

In [None]:
plt.style.use('seaborn-v0_8-paper')
pt = 1./72.27
fig_width = 241 * pt
aspect_ratio = (1 + 5 ** 0.5) / 2
plt.rcParams["figure.figsize"] = (fig_width, fig_width/aspect_ratio)
plt.rcParams["savefig.bbox"] = "tight"
sns.set_theme(style='whitegrid', context='talk', rc={'savefig.transparent': True, 'figure.figsize': (fig_width, fig_width/aspect_ratio)})

colors = ["#00a1de", "#009b3a", "#c60c30", "#f9461c", "#532A92"]

In [None]:
df = pd.read_csv("Energy Awareness in HPC_September 16, 2024_12.48.csv")
questions = df.iloc[0, :]
df = df.drop([0,1], axis=0)
questions

In [None]:
# Filter out data recorded before we distributed the survey - this will internal responses
df["RecordedDate"] = pd.to_datetime(df["RecordedDate"])
df = df[df["RecordedDate"] >= pd.to_datetime("2024-8-15")]
df

In [None]:
df["Progress"] = df["Progress"].astype(float)
sns.ecdfplot(data=df, y="Progress", stat="count")

In [None]:
len(df[df["Progress"] > 90])

In [None]:
len(df[df["Finished"] == "True"])

In [None]:
questions[15:]

In [None]:
questions.loc["Q25_3"]

In [None]:
sns.countplot(data=df, x="Q54")
df.groupby("Q54").size()

In [None]:
df.groupby("Career Stage").size()

In [None]:
fig, ax = plt.subplots(1,2, sharey=True)
sns.countplot(x="value", data=pd.melt(df[["Q4"]]), order=["1-4", "5-9", "10-14", "15-19", "20+"], ax=ax[0], width=1, palette=colors)
sns.countplot(x="value", data=pd.melt(df[["Q6"]]), order=["1-4", "5-9", "10-14", "15-19", "20+"], ax=ax[1], width=1, palette=colors)
ax[0].set_xlabel("Available")
ax[0].set_xticklabels(labels=ax[0].get_xticklabels(), rotation=45, ha="center", va="center")
ax[1].set_xlabel("Used")
ax[1].set_xticklabels(labels=ax[0].get_xticklabels(), rotation=45, ha="center", va="center")
ax[0].set_ylabel("Number of Respondents")
fig.supxlabel("Number of Node Types", y=-0.25, fontsize="small")
plt.savefig("figures/node_types.pdf")
plt.savefig("figures/node_types_presentation.png")

In [None]:
order = ["1-4", "5-9", "10-14", "15-19", "20+"]
df["UsedDiff"] = pd.to_numeric(df["Q4"].replace(order, range(5))) - pd.to_numeric(df["Q6"].replace(order, range(5)))
# display(df["Q4"])
# df["Q6"] = df["Q4"].replace(order, range(5))
# df["UsedDiff"] = df["Q4"] - df["Q6"]
sns.countplot(x="value", data=pd.melt(df[["UsedDiff"]]), order=[0.0, 1.0, 2.0, 3.0, 4.0])
df["UsedDiff"].value_counts(normalize=True)
df["UsedDiff"].value_counts()

In [None]:
fig, ax = plt.subplots(2,2, sharex=True, sharey=True)
order = ["0-100", "100-500", "500-1000", "1000-5000", "5000-10000", "10000+"]
df['Q43'] = pd.Categorical(df['Q43'], order)

sns.histplot(df, x="Q43", hue="Q14", multiple="stack", hue_order=["Yes", "No"], ax=ax[0][0])
sns.histplot(df, x="Q43", hue="Q15", multiple="stack", hue_order=["Yes", "No"], ax=ax[0][1])
sns.histplot(df, x="Q43", hue="Q17", multiple="stack", hue_order=["Yes", "No"], ax=ax[1][0])
sns.histplot(df, x="Q43", hue="Q19", multiple="stack", hue_order=["Yes", "No"], ax=ax[1][1])

sns.move_legend(ax[0][0], "lower center", bbox_to_anchor=(1.05, 1.05), ncol=2, title=None, fontsize=7)
ax[1][0].set_xticklabels(labels=ax[1][0].get_xticklabels(), rotation=45, ha="right", va="center_baseline")
ax[1][1].set_xticklabels(labels=ax[1][1].get_xticklabels(), rotation=45, ha="right", va="center_baseline")
ax[0][1].get_legend().remove()
ax[1][0].get_legend().remove()
ax[1][1].get_legend().remove()
ax[0][0].set_ylabel("Runtime")
ax[1][0].set_ylabel("Energy")
ax[1][0].set_xlabel("Aware")
ax[1][1].set_xlabel("Reduced")
fig.supxlabel("Number of Node Hours", y=-0.42)
fig.supylabel("Count", x=-0.1)

In [None]:
fig, ax = plt.subplots(2,2, sharex=True, sharey=True)
# order = ["0-100", "100-500", "500-1000", "1000-5000", "5000-10000", "10000+"]
# df['Q43'] = pd.Categorical(df['Q43'], order)

sns.histplot(df, x="Career Stage", hue="Q14", multiple="stack", hue_order=["Yes", "No"], ax=ax[0][0])
sns.histplot(df, x="Career Stage", hue="Q15", multiple="stack", hue_order=["Yes", "No"], ax=ax[0][1])
sns.histplot(df, x="Career Stage", hue="Q17", multiple="stack", hue_order=["Yes", "No"], ax=ax[1][0])
sns.histplot(df, x="Career Stage", hue="Q19", multiple="stack", hue_order=["Yes", "No"], ax=ax[1][1])

sns.move_legend(ax[0][0], "lower center", bbox_to_anchor=(1.05, 1.05), ncol=2, title=None, fontsize=7)
ax[1][0].set_xticklabels(labels=ax[1][0].get_xticklabels(), rotation=45, ha="right", va="center_baseline")
ax[1][1].set_xticklabels(labels=ax[1][1].get_xticklabels(), rotation=45, ha="right", va="center_baseline")
ax[0][1].get_legend().remove()
ax[1][0].get_legend().remove()
ax[1][1].get_legend().remove()
ax[0][0].set_ylabel("Runtime")
ax[1][0].set_ylabel("Energy")
ax[1][0].set_xlabel("Aware")
ax[1][1].set_xlabel("Reduced")
fig.supxlabel("Role", y=-0.42)
fig.supylabel("Count", x=-0.1)

In [None]:
temp_data = [
    ["Aware of Metric", "Node Hours", (df["Q14"] == "Yes").sum()/((df["Q14"] == "Yes").sum() + (df["Q14"] == "No").sum())], 
    ["Reduced Metric", "Node Hours", (df["Q15"] == "Yes").sum()/((df["Q15"] == "Yes").sum() + (df["Q15"] == "No").sum())],
    ["Aware of Metric", "Energy", (df["Q17"] == "Yes").sum()/((df["Q17"] == "Yes").sum() + (df["Q17"] == "No").sum())], 
    ["Reduced Metric", "Energy", (df["Q19"] == "Yes").sum()/((df["Q19"] == "Yes").sum() + (df["Q19"] == "No").sum())]
]

temp_df = pd.DataFrame(temp_data, columns=["Action", "Metric", "Proportion"])
fig ,ax = plt.subplots(1,1)
sns.barplot(temp_df, x="Metric", y="Proportion", hue="Action", ax=ax, palette=colors)
sns.move_legend(ax, "center left", bbox_to_anchor=(1, 0.5))
ax.set_xlabel(None)
ax.get_legend().set_title(None)
plt.savefig("figures/awareness_comparison.pdf")
plt.savefig("figures/awareness_comparison_talk.png")
print(temp_data)

In [None]:
display(df["Q14"].value_counts())
display(df["Q15"].value_counts())
display(df["Q17"].value_counts())
display(df["Q19"].value_counts())

In [None]:
display(df[df["Q17"] == "Yes"]["Q19"].value_counts(normalize=True))
display(df[df["Q19"] == "Yes"]["Q17"].value_counts(normalize=True))

In [None]:
df1 = df.groupby("Q54")["Q14"].value_counts(normalize=True)
# df1 = df1.mul(100)
df1 = df1.rename('percent').reset_index()

fig, ax = plt.subplots(1,1)
sns.barplot(data=df1, x="Q54", y='percent', hue="Q14", order=["North America", "Europe"], hue_order=["No", "Yes"], ax=ax)
ax.set_xlabel("Aware of Node Hours")

df1 = df.groupby("Q54")["Q17"].value_counts(normalize=True)
# df1 = df1.mul(100)
df1 = df1.rename('percent').reset_index()

fig, ax = plt.subplots(1,1)
sns.barplot(data=df1, x="Q54", y='percent', hue="Q17", order=["North America", "Europe"], hue_order=["No", "Yes"], ax=ax)
ax.set_xlabel("Aware of Energy")

In [None]:
fig, ax = plt.subplots(1,1)
sns.countplot(data=df[~df["Q16"].isna()], x="Q16", stat="percent", ax=ax)
plt.show()

fig, ax = plt.subplots(1,1)
data = df[~df["Q16"].isna()]["Q16"].value_counts(normalize=True)
ax.pie(data, labels=data.index, colors=sns.color_palette()[0:3])

In [None]:
display(df["Q16"].value_counts())

In [None]:
filtered = df[~df["Q16"].isna()]
filtered[filtered["Q16"] == "Very concerned"]["Q15"].value_counts(normalize=True)

In [None]:
sns.countplot(df[df["Q45"].astype(str).str.contains("Energy")], x="Q17", order=["No", "Yes"], stat="percent")

In [None]:
sns.countplot(df[df["Q45"].astype(str).str.contains("Energy")], x="Q19", stat="percent")

In [None]:
df1 = pd.melt(df[["Q21_1", "Q21_2", "Q21_3", "Q21_4", "Q21_5", "Q21_6"]])
fig, ax = plt.subplots(1,1)
# sns.countplot(data=df1[df1["value"] == "Yes"], x="variable", ax=ax)
sns.histplot(data=df1, x="variable", hue="value", multiple="stack", stat="count", ax=ax)
ax.set_xticklabels(["Green500", "SPEC SERT", "Carbon Intensity", "PUE", "DVFS", "Vendor Fact Sheets"])
display(df1[df1["value"] == "Yes"].groupby("variable").size())
df1[df1["value"] == "Yes"].groupby("variable").size()/ df1.dropna(subset="value").groupby("variable").size()

In [None]:
df1 = pd.melt(df[["Q20_1", "Q20_2", "Q20_3", "Q20_4"]])
fig, ax = plt.subplots(1,1)
sns.histplot(data=df1, x="variable", hue="value", multiple="stack", stat="count", ax=ax, palette=colors, shrink=0.7)
# sns.move_legend(ax, "center left", bbox_to_anchor=(1, 0))
ax.set_xticklabels(["Green500", "SPEC SERT", "Carbon\nIntensity", "PUE"], rotation=20)
ax.set_xlabel(None)
ax.set_ylabel("Num. Responses")
ax.get_legend().set_title(None)
plt.savefig("figures/use_of_tools.pdf")
plt.savefig("figures/use_of_tools_talk.png")
display(df1[df1["value"] == "Yes"].groupby("variable").size())
df1[df1["value"] == "Yes"].groupby("variable").size()/ df1.dropna(subset="value").groupby("variable").size()

In [None]:
question_map = {
    "Hardware": "Q22_3",
    "Queue": "Q22_2",
    "Performance": "Q22_4",
    "Funding": "Q22_1",
    "Software": "Q22_6",
    "Ease of Use": "Q22_7",
    "Experience": "Q22_8",
    "Energy": "Q22_5"    
}
df1 = pd.melt(df[list(question_map.values())])
df1.loc[df1["value"] == "1 (Not important)", "value"] = "1 (Not Important)"
# df1 = df1.groupby("variable")["value"].value_counts(normalize=True).rename("proportion").reset_index()
# display(df1)
fig, ax = plt.subplots(1,1)
sns.histplot(data=df1, x="variable", hue="value", hue_order=["1 (Not Important)", "2", "3 (Very Important)"], multiple="stack", stat="count", ax=ax, palette=colors, shrink=0.75)
# sns.move_legend(ax, "center left", bbox_to_anchor=(1, 0))
ax.set_xticklabels(list(question_map.keys()), rotation=30, ha='right')
ax.set_xlabel(None)
ax.set_ylabel("Num. Responses")
ax.get_legend().set_title(None)
plt.savefig("figures/selection_priority.pdf")
plt.savefig("figures/selection_priority_talk.png")

df1[df1["value"] == "3 (Very Important)"].groupby("variable").size()

In [None]:
df["Q22_5"].value_counts(normalize=True)

In [None]:
df["Q22_4"].value_counts(normalize=True)

In [None]:
fig, ax = plt.subplots()
sns.histplot(df, x="Q43", hue="Q22_5", multiple="stack", ax=ax)
ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=45, ha="right", va="center_baseline")
ax.set_xlabel("Node Hours used Per Year")
ax.set_ylabel("Num. Responses")
ax.get_legend().set_title(None)

In [None]:
fig, ax = plt.subplots()
sns.histplot(df, x="Career Stage", hue="Q22_5", multiple="stack", ax=ax)
ax.set_xticklabels(labels=ax.get_xticklabels(), rotation=45, ha="right", va="center_baseline")
ax.set_xlabel(None)
ax.set_ylabel("Num. Responses")
ax.get_legend().set_title(None)

In [None]:
df["Q48_1"] = df["Q48_1"].astype(float)
sns.histplot(data=df, x="Q48_1")
plt.title("Estimated CPU Utilization")

In [None]:
df["Q50_1"] = df["Q50_1"].astype(float)
sns.histplot(data=df, x="Q50_1")
plt.title("Estimated GPU Utilization")

In [None]:
sns.displot(data=df, x="Q48_1", y="Q50_1", legend="full")

In [None]:
df["Q37_4"] = df["Q37_4"].astype(float)
sns.histplot(data=df, x="Q37_4")
plt.title("Estimated Memory Utilization")

In [None]:
df.groupby("Q24").size()

In [None]:
df["Q24"].value_counts(normalize=True)

In [None]:
df.groupby("Q40").size()

In [None]:
df["Q40"].value_counts(normalize=True)

In [None]:
df.groupby("Q25_1").size()

In [None]:
df.groupby("Q25_2").size()

In [None]:
df.groupby("Q25_3").size()

In [None]:
df.value_counts(subset=["Q40", "Q25_3"], normalize=True)

In [None]:
df["Q25_3"].value_counts(normalize=True)

In [None]:
df.groupby("Q25_4").size()