In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

data = pd.read_csv("../Data/Slurm-data-2.csv")
data.columns = ['UID','GID','JobIDRaw','Group','Account','JobName','TimelimitRaw','Submit','Start','End','State','ExitCode',
                'ReservationId','Reservation','Priority','Eligible','Constraints','SystemCPU','CPUTimeRAW','ElapsedRaw',
                'Layout','NTasks','QOSREQ','QOS','Restarts','WorkDir','ConsumedEnergyRaw','FailedNode','AveDiskRead',
                'AveDiskWrite','MaxDiskRead','MaxDiskWrite','Partition','Reason','Suspended','AllocNodes','AveRSS','MaxRSS',
                'DerivedExitCode','AveVMSize','MaxVMSize','ReqMem','ReqNodes','NNodes','Planned','PlannedCPURAW','NCPUS',
                'UserCPU','ReqCPUS','TotalCPU','TRESUsageInTot','TRESUsageOutTot','ReqTRES','AllocTRES','TRESUsageInMax',
                'TRESUsageOutMax','Flags','Comment','SystemComment','AdminComment']

data = data.dropna(subset=['JobIDRaw', 'Submit', 'Start'])

#Verify no missing values in JobIDRaw, Submit, Start
#print(data.shape)
#nan_columns = data.columns[data.isna().any()]
#print(data[nan_columns])

#Removes Na values in JobIDRaw, Submit, Start
data = data.sort_values(by=['JobIDRaw'], ascending=True)

#Errors=coerce removes invalid dates that are not NaNs
data['Start'] = pd.to_datetime(data["Start"], format='%Y-%m-%dT%H:%M:%S', errors='coerce')
data['Start'] = (data["Start"] - pd.Timestamp('1970-01-01')) // pd.Timedelta('1s')
data['Submit'] = pd.to_datetime(data['Submit'], format='%Y-%m-%dT%H:%M:%S', errors='coerce')
data['Submit'] = (data['Submit'] - pd.Timestamp('1970-01-01')) // pd.Timedelta('1s')

wait_times = []

for i in range(len(data)):
    wait = data.iloc[i]['Start'] - data.iloc[i]['Submit']
    wait_times.append(wait)

plot = px.scatter(data, x=data['JobIDRaw'], y=wait_times, color=data['State'],
                    title='Wait Time vs JobID',
                    labels={'x': 'JobID Time', 'y': 'Wait Time (seconds)'},
                    color_discrete_map={'timeout': 'black'})

#Can add lots of color,opacity, size, shape info to the datapoints
plot.update_traces(marker=dict(size=6))
plot.update_layout(xaxis=dict(range=[2960, 3165]))
plot.update_layout(yaxis=dict(range=[-1, 40]))
plot.show()

plot.write_html("Slurm-Data-2-plot.html")

#Also, can click and drag on the plot to zoom in on a specific area of the graph