In [1]:
import subprocess
import time
import numpy as np
import random
import pandas as pd
from datetime import datetime
import re
random.seed(10)
np.random.seed(10)

In [2]:
def create_sbatch_script(job_name, script_content):
    script_path = f"tmp/{job_name}.sh"
    with open(script_path, 'w') as file:
        file.write(script_content)
    return script_path

def submit_job_as_user(script_path, user):
    try:
        cmd = f"sudo -u {user} sbatch {script_path}"
        subprocess.run(cmd, shell=True, check=True, universal_newlines=True, stdout=subprocess.PIPE)
        print(f"Job {script_path} submitted as {user}")
    except subprocess.CalledProcessError as e:
        print(f"Error submitting job {script_path} as {user}: {e}")
        

def submit_job_as_user_test_only(script_path, user):
    cmd = f"sudo -u {user} sbatch --test-only {script_path}"
    result = subprocess.run(cmd, shell=True, check=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return result.stderr

def seconds_to_minutes(seconds):
    seconds = int(seconds)
    minutes = seconds // 60
    remaining_seconds = seconds % 60
    return f"{minutes}:{remaining_seconds:02d}"

In [3]:
def time_difference_seconds(sbatch_output):
    # for sbatch --test-only (default estimator)
    time_match = re.search(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}', sbatch_output)
    if time_match:
        job_start_time_str = time_match.group()
        job_start_time = datetime.strptime(job_start_time_str, '%Y-%m-%dT%H:%M:%S')
    else:
        return None  # Return None if no date-time found

    # Get the current time
    current_time = datetime.now()

    # Calculate the difference in seconds
    return (job_start_time - current_time).total_seconds()

In [4]:
DURATION = 10000
USERS_NAMES = ["user1", "user2", "user3"]

JOB_SCRIPT_CONTENT = \
"""#!/bin/bash
#SBATCH --job-name={job_id}
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --output=/home/{user}/result-%j.out
#SBATCH --time={req_time}
#SBATCH --comment="logging"
#SBATCH --error=/home/{user}/error-%j.err

echo "Starting job $SLURM_JOB_ID"
sleep {sleep_time}
echo "Job $SLURM_JOB_ID completed"
"""

In [5]:
task_time_1 = np.abs(np.random.normal(10, 3, DURATION))
task_time_2 = np.abs(np.random.normal(50, 10, DURATION))
task_time_3 = np.abs(np.random.normal(100, 10, DURATION))

task_noise_1 = np.abs(np.random.normal(task_time_1 * 0.2, task_time_1 * 0.02, DURATION))
task_noise_2 = np.abs(np.random.normal(task_time_2 * 0.3, task_time_2 * 0.01, DURATION))
task_noise_3 = np.abs(np.random.normal(task_time_3 * 0.1, task_time_3 * 0.04, DURATION))

submit_time_1 = np.random.binomial(1, 0.05 / 2, DURATION)
submit_time_2 = np.random.binomial(1, 0.02 / 2, DURATION)
submit_time_3 = np.random.binomial(1, 0.01 / 2, DURATION)

In [6]:
sum(submit_time_3)

49

In [7]:
job_id = 0

In [8]:
log_slurm_estimator = True
jobid2preds = dict()

In [9]:
for i in range(DURATION):
    time.sleep(0.1)
    if submit_time_1[i]:        
        if log_slurm_estimator:
            script_path = create_sbatch_script(f"job_{job_id}", JOB_SCRIPT_CONTENT.format(
                user=USERS_NAMES[0], 
                sleep_time=task_time_1[i], 
                req_time=seconds_to_minutes(task_time_1[i] + task_noise_1[i]),
                job_id=f"job_{job_id}_test_only"
            ))
            sbatch_output = submit_job_as_user_test_only(script_path, USERS_NAMES[0])
            waittime_estimation = time_difference_seconds(sbatch_output)
            jobid2preds[job_id] = waittime_estimation
            
        script_path = create_sbatch_script(f"job_{job_id}", JOB_SCRIPT_CONTENT.format(
            user=USERS_NAMES[0], 
            sleep_time=task_time_1[i], 
            req_time=seconds_to_minutes(task_time_1[i] + task_noise_1[i]),
            job_id=f"job_{job_id}"
        ))
        
        submit_job_as_user(script_path, USERS_NAMES[0])
        job_id += 1
        seconds_to_minutes(task_time_1[i])
        print(f"Submit job_{job_id}; time={i}")

    if submit_time_2[i]:        
        if log_slurm_estimator:
            
            script_path = create_sbatch_script(f"job_{job_id}_test_only", JOB_SCRIPT_CONTENT.format(
                user=USERS_NAMES[1], 
                sleep_time=task_time_2[i], 
                req_time=seconds_to_minutes(task_time_2[i] + task_noise_2[i]),
                job_id=f"job_{job_id}_test_only"
            ))
            
            sbatch_output = submit_job_as_user_test_only(script_path, USERS_NAMES[1])
            waittime_estimation = time_difference_seconds(sbatch_output)
            jobid2preds[job_id] = waittime_estimation

        script_path = create_sbatch_script(f"job_{job_id}", JOB_SCRIPT_CONTENT.format(
            user=USERS_NAMES[1], 
            sleep_time=task_time_2[i], 
            req_time=seconds_to_minutes(task_time_2[i] + task_noise_2[i]),
            job_id=f"job_{job_id}"
        ))

        submit_job_as_user(script_path, USERS_NAMES[1])
        job_id += 1
        print(f"Submit job_{job_id}; time={i}")

    if submit_time_3[i]:    
        if log_slurm_estimator:
            script_path = create_sbatch_script(f"job_{job_id}_test_only", JOB_SCRIPT_CONTENT.format(
                user=USERS_NAMES[2], 
                sleep_time=task_time_3[i], 
                req_time=seconds_to_minutes(task_time_3[i] + task_noise_3[i]),
                job_id=f"job_{job_id}__test_only"
            ))
            
            sbatch_output = submit_job_as_user_test_only(script_path, USERS_NAMES[2])
            waittime_estimation = time_difference_seconds(sbatch_output)
            jobid2preds[job_id] = waittime_estimation

        script_path = create_sbatch_script(f"job_{job_id}", JOB_SCRIPT_CONTENT.format(
            user=USERS_NAMES[2], 
            sleep_time=task_time_3[i], 
            req_time=seconds_to_minutes(task_time_3[i] + task_noise_3[i]),
            job_id=f"job_{job_id}"
        ))
        
        submit_job_as_user(script_path, USERS_NAMES[2])
        job_id += 1
        print(f"Submit job_{job_id}; time={i}")

Job tmp/job_0.sh submitted as user2
Submit job_1; time=0
Job tmp/job_1.sh submitted as user2
Submit job_2; time=1
Job tmp/job_2.sh submitted as user1
Submit job_3; time=20
Job tmp/job_3.sh submitted as user2
Submit job_4; time=33
Job tmp/job_4.sh submitted as user3
Submit job_5; time=33
Job tmp/job_5.sh submitted as user1
Submit job_6; time=53
Job tmp/job_6.sh submitted as user1
Submit job_7; time=126
Job tmp/job_7.sh submitted as user1
Submit job_8; time=150
Job tmp/job_8.sh submitted as user1
Submit job_9; time=156
Job tmp/job_9.sh submitted as user1
Submit job_10; time=161
Job tmp/job_10.sh submitted as user1
Submit job_11; time=180
Job tmp/job_11.sh submitted as user1
Submit job_12; time=226
Job tmp/job_12.sh submitted as user3
Submit job_13; time=230
Job tmp/job_13.sh submitted as user3
Submit job_14; time=232
Job tmp/job_14.sh submitted as user1
Submit job_15; time=310
Job tmp/job_15.sh submitted as user1
Submit job_16; time=325
Job tmp/job_16.sh submitted as user1
Submit job_17;

In [10]:
if log_slurm_estimator:
    with open('slurm_estimations.csv', 'w') as f:
        print("job_id,slurm_estimation", file=f)
        for job_id, slurm_estimation in jobid2preds.items():
            slurm_estimation = slurm_estimation if slurm_estimation > 0 else 0
            print(f"{job_id},{slurm_estimation}", file=f)