In [1]:
import docker
from time import sleep
from typing import List, Dict, Any
import time


In [2]:
docker_client = docker.from_env()

In [3]:
image_dict = {
    "blackscholes": "anakli/cca:parsec_blackscholes",
    "canneal": "anakli/cca:parsec_canneal",
    "dedup": "anakli/cca:parsec_dedup",
    "ferret": "anakli/cca:parsec_ferret",
    "freqmine": "anakli/cca:parsec_freqmine",
    "radix": "anakli/cca:splash2x_radix",
    "vips": "anakli/cca:parsec_vips",
}

In [4]:
scaling = [1.70, 1.70, 1.70, 1.95, 1.95, 1.95, 1.95]
duration = [100, 220, 16, 288, 394, 43, 82]
inteference = [9, 9, 10, 11, 11, 8, 11]

jobs = list(zip(image_dict.keys(), scaling, duration, inteference))

start_queue = sorted(jobs, key=lambda x: (x[3], -x[2], x[1]), reverse=False)
print("Starting queue:")
for job in start_queue:
    print(f"  {job[0]}: {job[1]}, {job[2]} {job[3]}")

Starting queue:
  radix: 1.95, 43 8
  canneal: 1.7, 220 9
  blackscholes: 1.7, 100 9
  dedup: 1.7, 16 10
  freqmine: 1.95, 394 11
  ferret: 1.95, 288 11
  vips: 1.95, 82 11


In [5]:
class Job:
    def __init__(self, name, scaling, duration, inteference):
        self.name = name
        self.scaling = scaling
        self.duration = duration
        self.inteference = inteference
        self.image_name = image_dict[name]
        self.container = None
        self.cpu_set = ""

    def __repr__(self):
        return f"Job({self.name}, {self.scaling}, {self.duration}, {self.inteference})"
    
    def is_scaling_job(self):
        return self.duration > 100 and self.scaling > 1.9
    
    def set_container(self, container):
        self.container = container
        self.start_time = time.time()
    
    def update_cpu_set(self, additional_cpus):
        self.cpu_set += f",{additional_cpus}" if self.cpu_set else additional_cpus

    def runtime(self):
        if self.start_time:
            return time.time() - self.start_time
        return 0
    
    def is_finished(self):
        if not self.container:
            return False        
        try:
            self.container.reload()
            return self.container.status == 'exited'
        except:
            # Container might be removed already
            return True

start_queue = [Job(*job) for job in start_queue]

In [6]:
curr_jobs: List[Job] = []
avail_cpus = ["0", "1"]
max_num_threads = 2


client = docker.from_env()

polling_interval = 0.1

while len(start_queue) > 0 or len(curr_jobs) > 0:

    for job in curr_jobs:
        if job.is_finished():
            print(f"Job {job.name} completed after {job.runtime():.2f} seconds")

            # Free up the CPUs that were allocated to this job
            for cpu in job.cpu_set.split(","):
                if cpu:  # Ensure it's not an empty string
                    avail_cpus.append(cpu)

            curr_jobs.remove(job)
            print(f"Released CPUs: {job.cpu_set}. Available CPUs: {avail_cpus}")

    if len(avail_cpus) == 0:
        # print("No available CPUs, waiting...")
        sleep(polling_interval)
        continue

    avail_cpu = avail_cpus.pop(0)

    # If we have a scaling job, we do not need to pop
    scaling_jobs = [job for job in curr_jobs if job.is_scaling_job()]
    if len(scaling_jobs) > 0:
        scaling_job = scaling_jobs[0]

        scaling_job.update_cpu_set(avail_cpu)

        cpu_set = scaling_job.cpu_set
        num_threads = len(cpu_set.split(",")) + 1

        scaling_job.container.update(cpuset_cpus=cpu_set)
        print(f"Assigned CPU {avail_cpu} to {curr_jobs[0].name}")
        continue
    
    if len(start_queue) == 0:
        # No more jobs to start
        sleep(polling_interval)
        continue

    # Start a new job
    job = start_queue.pop(0)

    run_command = (
        "./run -a run -S splash2x -p radix -i native -n 1"
        if job.name == "radix"
        else f"./run -a run -S parsec -p {job.name} -i native -n {4 if job.is_scaling_job() else 1}"
    )
    
    container = client.containers.run(
        image=job.image_name,
        command=run_command,
        detach=True,
        remove=True,
        name="parsec-" + job.name,
        cpuset_cpus=avail_cpu,
    )

    print(f"Started job {job.name} on CPU {avail_cpu}")
    job.set_container(container)
    job.update_cpu_set(avail_cpu)
    curr_jobs.append(job)

Started job radix on CPU 0
Started job canneal on CPU 1
Job radix completed after 35.22 seconds
Released CPUs: 0. Available CPUs: ['0']
Started job blackscholes on CPU 0
Job blackscholes completed after 113.34 seconds
Released CPUs: 0. Available CPUs: ['0']
Started job dedup on CPU 0
Job dedup completed after 26.76 seconds
Released CPUs: 0. Available CPUs: ['0']
Started job freqmine on CPU 0
Job canneal completed after 188.13 seconds
Released CPUs: 1. Available CPUs: ['1']
Assigned CPU 1 to freqmine
Job freqmine completed after 210.48 seconds
Released CPUs: 0,1. Available CPUs: ['0', '1']
Started job ferret on CPU 0
Assigned CPU 1 to ferret
Job ferret completed after 170.39 seconds
Released CPUs: 0,1. Available CPUs: ['0', '1']
Started job vips on CPU 0
Job vips completed after 85.08 seconds
Released CPUs: 0. Available CPUs: ['0']


In [7]:
# container = client.containers.run(
#     image=image_dict["radix"],
#     command=f"./run -a run -S parsec -p radix -i native -n {1}",
#     detach=True,
#     remove=True,
#     name='parsec-' + job.name,
#     cpuset_cpus=avail_cpu
# )

# # print logs from the container
# for line in container.logs(stream=True):
#     print(line.strip())
#     if b"parsec" in line:
#         print(line.strip())
#         break