In [25]:
import numpy as np
import json
import math

In [26]:
class ProblemGenerator:
    def __init__(
        self,
        lambda_arrival=0.01,
        mu_execution=80,
        sigma_execution=30,
        mu_noise=0,
        sigma_noise=1,
        num_jobs=None,
        max_node=8,
        runtime_equals_reqtime: bool = True,
        min_time: int = 1,                 # <-- new: minimum time unit
    ):
        self.lambda_arrival = lambda_arrival
        self.mu_execution = mu_execution
        self.sigma_execution = sigma_execution
        self.mu_noise = mu_noise
        self.sigma_noise = sigma_noise
        self.max_node = max_node
        self.num_jobs = num_jobs if num_jobs is not None else max(
            1, int(np.random.normal(10, 2))
        )
        self.runtime_equals_reqtime = runtime_equals_reqtime
        self.min_time = int(max(1, min_time))            # enforce >=1

    def generate(self):
        interarrival_times = np.random.exponential(
            1 / self.lambda_arrival, self.num_jobs
        )
        arrival_times = np.cumsum(interarrival_times)

        # requested execution times (float), clip to small positive so rounding can't go negative
        requested_execution_times = np.clip(
            np.random.normal(self.mu_execution,
                             self.sigma_execution, self.num_jobs),
            a_min=1e-6, a_max=None
        )

        if self.runtime_equals_reqtime:
            actual_execution_times = requested_execution_times.copy()
        else:
            noise = np.random.normal(
                self.mu_noise, self.sigma_noise, self.num_jobs)
            actual_execution_times = np.clip(
                requested_execution_times + noise, a_min=1e-6, a_max=None
            )

        # nodes per job (rounded & clipped to [1, max_node])
        num_nodes_required = np.clip(
            np.rint(np.random.normal(math.ceil(self.max_node / 2),
                    1, self.num_jobs)).astype(int),
            1, self.max_node
        )

        workloads = []
        for i in range(self.num_jobs):
            # round to int, then clamp to at least min_time (prevents 0)
            req_i = int(round(float(requested_execution_times[i])))
            run_i = int(round(float(actual_execution_times[i])))
            req_i = max(self.min_time, req_i)
            run_i = max(self.min_time, run_i)

            workloads.append({
                "job_id": i + 1,
                "res": int(num_nodes_required[i]),
                "subtime": int(round(float(arrival_times[i]))),
                "reqtime": req_i,
                "runtime": run_i,
                "profile": "100",
                "user_id": 0,
            })

        return workloads

In [27]:
# Number of jobs to generate
num_jobs = 200
# Maximum number of nodes per job
max_node = 16
# Path to output JSON
output_path = "workloads/generated_200_16.json"

In [28]:
problem_generator = ProblemGenerator(num_jobs=num_jobs, max_node=max_node)
workloads = problem_generator.generate()

output_data = {
    "nb_res": max_node,
    "jobs": workloads,
    "profiles": {
        "100": {
            "cpu": 10000000000000000000000,
            "com": 0,
            "type": "parallel_homogeneous"
        }
    }
}

workload_filepath = output_path
with open(workload_filepath, "w") as json_file:
    json.dump(output_data, json_file, indent=4)