In [3]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

def convert_to_numbers(x):
    if isinstance(x, str):
        x = x.strip().upper()
        if x.endswith("K"):
            return float(x[:-1]) * 1000
    return pd.to_numeric(x, errors='coerce')

data = pd.read_csv("../Data/Slurm-data.csv", on_bad_lines='skip', header=None)
data.columns = ["JobID", "UID", "ReqNodes", "TimeLimitRaw", "ElapsedRaw"]

data["ReqNodes"] = data["ReqNodes"].apply(convert_to_numbers)
data["ElapsedRaw"] = data["ElapsedRaw"].apply(convert_to_numbers)

data_cleaned = data.dropna(subset=["ReqNodes", "ElapsedRaw"])
data_cleaned = data_cleaned.sort_values(by="ElapsedRaw")
#print(data_cleaned["ReqNodes"])
#print(data_cleaned["ElapsedRaw"])

log_figure = px.scatter(data_cleaned, x="ReqNodes", y="ElapsedRaw", title="Elapsed Time vs Requested Nodes (Log scale)",labels={"ReqNodes": "Requested Nodes", "ElapsedRaw": "Elapsed Time (seconds)"}, log_x=True, log_y=True) 
linear_figure = px.scatter(data_cleaned, x="ReqNodes", y="ElapsedRaw", title="Elapsed Time vs Requested Nodes",labels={"ReqNodes": "Requested Nodes", "ElapsedRaw": "Elapsed Time (seconds)"}, log_x=True)
log_figure.show()
linear_figure.show()
log_figure.write_image("LogarithmicElapsedTimeVsRequestedNodes.png")
linear_figure.write_image("ElapsedTimeVsRequestedNodes.png")


