In [19]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import docker
import time
import concurrent.futures
from datetime import datetime
import copy
import math
from collections import defaultdict
import pprint

In [10]:
# Discover the hardware architecture.
avail_cores = os.cpu_count()
print(f"Available cores: {avail_cores}")

Available cores: 48


In [None]:
# Run docker-activity to monitor power consumption and runtime of the containers.

# Initialize Docker client
client = docker.from_env()
activity_container = client.containers.run(
    image="jdrouet/docker-activity",
    command=["stdout"],
    volumes={
        "/sys/class/powercap": {"bind": "/sys/class/powercap", "mode": "ro"},
        "/var/run/docker.sock": {"bind": "/var/run/docker.sock", "mode": "rw"},
    },
    privileged=True,
    detach=True,
    auto_remove=True
)
for log in activity_container.logs(stream=True, follow=True):
    print(log.decode().strip())

In [3]:
# Run isolated benchmarks in a Docker container on the different cores.
def parse_start_time(start_time_str):
    # Trim to microseconds and remove trailing 'Z'
    if '.' in start_time_str:
        time_part, rest = start_time_str.split('.')
        microseconds = rest[:6]  
        return datetime.strptime(f"{time_part}.{microseconds}", "%Y-%m-%dT%H:%M:%S.%f")
    return datetime.strptime(start_time_str.replace('Z', ''), "%Y-%m-%dT%H:%M:%S")

def parse_die_time(die_time_str):
    if '.' in die_time_str:
        time_part, rest = die_time_str.split('.')
        microseconds = rest[:6]  
        return datetime.strptime(f"{time_part}.{microseconds}", "%Y-%m-%dT%H:%M:%S.%f")
    return datetime.strptime(die_time_str.replace('Z', ''), "%Y-%m-%dT%H:%M:%S")

def run_container(task):
    container = task['client'].containers.run(
        image=task['image'],
        command=task['command'],
        cpuset_cpus=task['cpuset_cpus'],
        cgroupns="private",
        detach=True,
        labels={"test": next((arg.split('=')[1] for arg in task['command'] if arg.startswith('--test=')), None)}
    )
    max_retries = 5
    retry_interval = 1
    start_time = None  
    
    for attempt in range(max_retries):
        container.reload()
        # Capture container metadata
        if container.status == 'running':
            started_at = container.attrs['State']['StartedAt']
            start_time = parse_start_time(started_at)
            print(f"Container for benchmark {task['command']} started successfully.")
            break
        else:
            print(f"Attempt {attempt + 1} failed, retrying in {retry_interval} seconds...")
            time.sleep(retry_interval)
    
    # Ensure start_time is set even if the container does not reach 'running'
    if start_time is None:
        started_at = container.attrs['State']['StartedAt']
        start_time = parse_start_time(started_at)
    
    container.stop()
    container.reload()
    died_at = container.attrs['State']['FinishedAt']
    die_time = parse_die_time(died_at)
    container_lifetime = (die_time - start_time).total_seconds()
    print(f"Container lifetime: {container_lifetime} seconds")
    isolated_benchmarking_results[container.name] = {
        # 'workload': container.attrs['Config']['Labels'],
        'coloc_pair': None,
        'workload': container.attrs['Config']['Labels'].get('test', None), 
        'id': container.id,
        'life_time': container_lifetime,
    }
    container.remove()
    print(f"Container on CPU {task['cpuset_cpus']} completed and removed.")

if __name__ == "__main__":
            
    # Initialize Docker client
    client = docker.from_env()
    
    # Prepare affinity score map for colocated pairs.
    isolated_benchmarking_results = {}

    # Init the disk benchmark.
    print("Container started for Disk benchmark (prepare).")
    disk_prepare_output = client.containers.run(
    image="niklas/sysbench",
    command=[
        "sysbench", "--test=fileio",
        "--file-total-size=50G",
        "--file-test-mode=rndrw",
        "--num-threads=1",
        "prepare"
    ],
    cpuset_cpus="1",
    cgroupns="private",
    detach=False, 
    auto_remove=True  
    )

    # List of dictionaries to hold task information.
    tasks = [
        {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=cpu", "--num-threads=8", "--cpu-max-prime=800000000000","run"], "cpuset_cpus": "1"},
        {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=memory", "--memory-block-size=1M", "--memory-total-size=10G", "--threads=1", "run"], "cpuset_cpus": "2"},
        {"client": client,"image": "niklas/sysbench","command": ["sysbench", "--test=fileio","--file-total-size=50G","--file-test-mode=rndrw","--init-rng=on","--max-time=300","--max-requests=0","run"],"cpuset_cpus": "3"}
    ]
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=2, thread_name_prefix="Isolator") as executor:
        futures = [executor.submit(run_container, task) for task in tasks]
    for future in futures:
        future.result()
    
    # Disk cleanup
    disk_cleanup_output = client.containers.run(
        image="niklas/sysbench",
        command=[
            "sysbench", "--test=fileio",
            "cleanup"
        ],
        cpuset_cpus="3",
        detach=False,  
        auto_remove=True  
    )
    print("Disk benchmark completed.")
        
    print("All isolated benchmarks completed.")
    # Clean up Docker client
    client.close()  

Container started for Disk benchmark (prepare).
Container for benchmark ['sysbench', '--test=memory', '--memory-block-size=1M', '--memory-total-size=10G', '--threads=1', 'run'] started successfully.
Container for benchmark ['sysbench', '--test=cpu', '--num-threads=8', '--cpu-max-prime=800000000000', 'run'] started successfully.
Container lifetime: 0.290878 seconds
Container on CPU 1 completed and removed.
Container lifetime: 9.672737 seconds
Container on CPU 2 completed and removed.
Container for benchmark ['sysbench', '--test=fileio', '--file-total-size=50G', '--file-test-mode=rndrw', '--init-rng=on', '--max-time=300', '--max-requests=0', 'run'] started successfully.
Container lifetime: 0.266721 seconds
Container on CPU 3 completed and removed.
Disk benchmark completed.
All isolated benchmarks completed.


In [4]:
# Access the watched benchmark containers for runtime and power consumption.
print("Container names and IDs:")
for name, container_id in isolated_benchmarking_results.items():
    print(f"{name}: {container_id}")

Container names and IDs:
dazzling_hellman: {'coloc_pair': None, 'workload': 'cpu', 'id': '9e7d7ef6e034cb577dacd0983d52834492cd5c029e17326463421067df0c5a83', 'life_time': 0.290878}
stoic_proskuriakova: {'coloc_pair': None, 'workload': 'memory', 'id': '178f99a65ca12481e33a9b125b0655d34c056c0dfcc6a24aebec1bcbbc423684', 'life_time': 9.672737}
priceless_einstein: {'coloc_pair': None, 'workload': 'fileio', 'id': '8b9d8d85343968bd59c09ca9443a0d03389371743c37f42da11ea05c4a824aab', 'life_time': 0.266721}


In [None]:
# Run co-located benchmarks in a Docker container on the same core.
def parse_start_time(start_time_str):
    # Trim to microseconds and remove trailing 'Z'
    if '.' in start_time_str:
        time_part, rest = start_time_str.split('.')
        microseconds = rest[:6]  
        return datetime.strptime(f"{time_part}.{microseconds}", "%Y-%m-%dT%H:%M:%S.%f")
    return datetime.strptime(start_time_str.replace('Z', ''), "%Y-%m-%dT%H:%M:%S")

def parse_die_time(die_time_str):
    if '.' in die_time_str:
        time_part, rest = die_time_str.split('.')
        microseconds = rest[:6]  
        return datetime.strptime(f"{time_part}.{microseconds}", "%Y-%m-%dT%H:%M:%S.%f")
    return datetime.strptime(die_time_str.replace('Z', ''), "%Y-%m-%dT%H:%M:%S")
    
def run_container(task):
    container = task['client'].containers.run(
        image=task['image'],
        command=task['command'],
        cpuset_cpus=task['cpuset_cpus'],
        cgroupns="private",
        detach=True,
        labels={"test": next((arg.split('=')[1] for arg in task['command'] if arg.startswith('--test=')), None)}
    )
    
    max_retries = 5
    retry_interval = 1
    start_time = None  
    
    for attempt in range(max_retries):
        container.reload()
        # Capture container metadata
        if container.status == 'running':
            started_at = container.attrs['State']['StartedAt']
            start_time = parse_start_time(started_at)
            print(f"Container for benchmark {task['command']} started successfully.")
            break
        else:
            print(f"Attempt {attempt + 1} failed, retrying in {retry_interval} seconds...")
            time.sleep(retry_interval)
    
    if start_time is None:
        started_at = container.attrs['State']['StartedAt']
        start_time = parse_start_time(started_at)
    
    container.stop()
    container.reload()
    died_at = container.attrs['State']['FinishedAt']
    die_time = parse_die_time(died_at)
    container_lifetime = (die_time - start_time).total_seconds()
    print(f"Container lifetime: {container_lifetime} seconds")
    coloc_benchmarking_results[container.name] = {
        'coloc_pair': pair_name,
        'workload': container.attrs['Config']['Labels'].get('test', None),  
        'id': container.id,
        'colocated_runtime': container_lifetime,
    }
    container.remove()
    print(f"Container on CPU {task['cpuset_cpus']} completed and removed.")

if __name__ == "__main__":
    
    # Initialize Docker client
    client = docker.from_env()
    
    # Prepare affinity score map for colocated pairs.
    coloc_benchmarking_results = {}

    # Init the disk benchmark.
    print("Container started for Disk benchmark (prepare).")
    disk_prepare_output = client.containers.run(
    image="niklas/sysbench",
    command=[
        "sysbench", "--test=fileio",
        "--file-total-size=300G",
        "--file-test-mode=rndrw",
        "--num-threads=1",
        "prepare"
    ],
    cpuset_cpus="1",
    cgroupns="private",
    detach=False, 
    auto_remove=True  
    )

    # Run every co-located benchmark combination on the same core.
    colocation = [
        {"CpuMem": [
            {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=cpu", "--num-threads=8", "--cpu-max-prime=40000000000", "run"], "cpuset_cpus": "0"},
            {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=memory", "--memory-block-size=8M", "--memory-total-size=150G", "--threads=1", "run"], "cpuset_cpus": "24"}
        ]},
        {"MemFileIO": [
            {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=memory", "--memory-block-size=8M", "--memory-total-size=150G", "--threads=1", "run"], "cpuset_cpus": "1"},
            {"client": client,"image": "niklas/sysbench","command": ["sysbench", "--test=fileio","--file-total-size=150G","--file-test-mode=rndrw","--init-rng=on","--max-time=300","--max-requests=0","run"],"cpuset_cpus": "25"}
        ]},
        {"FileIOCpu": [
            {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=cpu", "--num-threads=8", "--cpu-max-prime=40000000000", "run"], "cpuset_cpus": "2"},
            {"client": client,"image": "niklas/sysbench","command": ["sysbench", "--test=fileio","--file-total-size=300G","--file-test-mode=rndrw","--init-rng=on","--max-time=1800","--max-requests=0","run"],"cpuset_cpus": "26"}
        ]},
        {"CpuCpu": [
            {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=cpu", "--num-threads=8", "--cpu-max-prime=40000000000", "run"], "cpuset_cpus": "3"},
            {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=cpu", "--num-threads=8", "--cpu-max-prime=40000000000", "run"], "cpuset_cpus": "27"},
        ]},
        {"MemMem": [
            {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=memory", "--memory-block-size=8M", "--memory-total-size=150G", "--threads=1", "run"], "cpuset_cpus": "4"},
            {"client": client, "image": "niklas/sysbench", "command": ["sysbench", "--test=memory", "--memory-block-size=8M", "--memory-total-size=150G", "--threads=1", "run"], "cpuset_cpus": "28"},
        ]},
        {"FileIOFileIO": [
            {"client": client,"image": "niklas/sysbench","command": ["sysbench", "--test=fileio","--file-total-size=300G","--file-test-mode=rndrw","--init-rng=on","--max-time=1800","--max-requests=0","run"],"cpuset_cpus": "5"},
            {"client": client,"image": "niklas/sysbench","command": ["sysbench", "--test=fileio","--file-total-size=300G","--file-test-mode=rndrw","--init-rng=on","--max-time=1800","--max-requests=0","run"],"cpuset_cpus": "29"}
        ]}
    ]
    
    for coloc in colocation:
            for pair_name, tasks in coloc.items():
                print(f"Running colocated tasks for: {pair_name}")
                with concurrent.futures.ThreadPoolExecutor(max_workers=2, thread_name_prefix="Colocator") as executor:
                    futures = [executor.submit(run_container, task) for task in tasks]
                for future in futures:
                    future.result()
                print(f"Completed colocated tasks for: {pair_name}")
    
    # Disk cleanup
    disk_cleanup_output = client.containers.run(
        image="niklas/sysbench",
        command=[
            "sysbench", "--test=fileio",
            "cleanup"
        ],
        cpuset_cpus="3",
        detach=False,  
        auto_remove=True  
    )
    print("Disk benchmark completed.")
        
    print("All colocated benchmarks completed.")
    # Clean up Docker client
    client.close() 

Container started for Disk benchmark (prepare).
Running colocated tasks for: CpuMem
Container for benchmark ['sysbench', '--test=cpu', '--num-threads=8', '--cpu-max-prime=40000000000', 'run'] started successfully.
Container for benchmark ['sysbench', '--test=memory', '--memory-block-size=8M', '--memory-total-size=150G', '--threads=1', 'run'] started successfully.
Container lifetime: 0.176359 seconds
Container on CPU 0 completed and removed.
Container lifetime: 6.177636 seconds
Container on CPU 24 completed and removed.
Completed colocated tasks for: CpuMem
Running colocated tasks for: MemFileIO
Container for benchmark ['sysbench', '--test=fileio', '--file-total-size=150G', '--file-test-mode=rndrw', '--init-rng=on', '--max-time=300', '--max-requests=0', 'run'] started successfully.
Container for benchmark ['sysbench', '--test=memory', '--memory-block-size=8M', '--memory-total-size=150G', '--threads=1', 'run'] started successfully.
Container lifetime: 0.47887 seconds
Container on CPU 25 

In [17]:
# Access the watched benchmark containers for runtime and power consumption.
coloc_benchmarking_results_copy = copy.deepcopy(coloc_benchmarking_results)

print("Container names and IDs:")
for name, container_id in coloc_benchmarking_results.items():
    print(f"{name}: {container_id}")
# print(benchmarking_results)

Container names and IDs:
relaxed_kapitsa: {'coloc_pair': 'CpuMem', 'workload': 'cpu', 'id': '8e02ec1719b3da4bce14ceb4daaad5a0114c4f22ac2dd0211bbea3d4beee1d03', 'life_time': 0.176359, 'isolated_runtime': 0.290878, 'isolated_power_consumption': 'not yet computed'}
optimistic_mendeleev: {'coloc_pair': 'CpuMem', 'workload': 'memory', 'id': '5479a1f4a07660372324a8d855f044e6ef070e2fa85bf832d73dc5ac63ed0a64', 'life_time': 6.177636, 'isolated_runtime': 9.672737, 'isolated_power_consumption': 'not yet computed'}
dazzling_spence: {'coloc_pair': 'MemFileIO', 'workload': 'fileio', 'id': '41356ea3e28774481509db61e6a9a83304c577d7efcad42860b2adeb68a98726', 'life_time': 0.47887, 'isolated_runtime': 0.266721, 'isolated_power_consumption': 'not yet computed'}
eager_lichterman: {'coloc_pair': 'MemFileIO', 'workload': 'memory', 'id': '2d00eb49f89a3d5fe987b3f1ecee939c7aaeb7dbe10b798c0537cbb31160c75f', 'life_time': 6.304723, 'isolated_runtime': 9.672737, 'isolated_power_consumption': 'not yet computed'}
zen

In [23]:
def calc_average_slowdown(slowdown_1, slowdown_2):
    average_slowdown = (slowdown_1 + slowdown_2) / 2
    return average_slowdown

def calc_slowdown_factor(isolated_runtime_1, isolated_runtime_2, coloc_runtime_1, coloc_runtime_2):
    slowdown_1 = isolated_runtime_1 / coloc_runtime_1
    slowdown_2 = isolated_runtime_2 / coloc_runtime_2
    average_slowdown = calc_average_slowdown(slowdown_1, slowdown_2)
    return average_slowdown

def calc_affinity_score(isolated_runtime_1, coloc_runtime_1, isolated_runtime_2, coloc_runtime_2):
    affinity_score = (isolated_runtime_1 + isolated_runtime_2) / (coloc_runtime_1 + coloc_runtime_2)
    return min(1, affinity_score)
    
for name, result in coloc_benchmarking_results.items():
    coloc_workload = result.get('workload')
    # Find the isolated runtime for the same workload
    isolated_runtime = ''
    for iso_name, iso_result in isolated_benchmarking_results.items():
        if iso_result.get('workload') == coloc_workload and iso_result.get('coloc_pair') is None:
            isolated_runtime = iso_result.get('life_time')
            break
    result['isolated_runtime'] = isolated_runtime
    result['isolated_power_consumption'] = 'not yet computed'
    print(f"Colocated benchmark: {coloc_workload}, {result.get('coloc_pair')}, Isolated runtime: {isolated_runtime}")

Colocated benchmark: cpu, CpuMem, Isolated runtime: 0.290878
Colocated benchmark: memory, CpuMem, Isolated runtime: 9.672737
Colocated benchmark: fileio, MemFileIO, Isolated runtime: 0.266721
Colocated benchmark: memory, MemFileIO, Isolated runtime: 9.672737
Colocated benchmark: fileio, FileIOCpu, Isolated runtime: 0.266721
Colocated benchmark: cpu, FileIOCpu, Isolated runtime: 0.290878
Colocated benchmark: cpu, CpuCpu, Isolated runtime: 0.290878
Colocated benchmark: cpu, CpuCpu, Isolated runtime: 0.290878
Colocated benchmark: memory, MemMem, Isolated runtime: 9.672737
Colocated benchmark: memory, MemMem, Isolated runtime: 9.672737
Colocated benchmark: fileio, FileIOFileIO, Isolated runtime: 0.266721
Colocated benchmark: fileio, FileIOFileIO, Isolated runtime: 0.266721


In [24]:
# Transform benchmarking dict into a summary
coloc_summary = defaultdict(dict)

for name, result in coloc_benchmarking_results.items():
    pair = result['coloc_pair']
    workload = result['workload']
    if 'workload_1' not in coloc_summary[pair]:
        coloc_summary[pair]['workload_1'] = workload
        coloc_summary[pair]['colocated_runtime_1'] = result['life_time']
        coloc_summary[pair]['isolated_runtime_1'] = result['isolated_runtime']
        coloc_summary[pair]['power_consumption_1'] = result['isolated_power_consumption']
    else:
        coloc_summary[pair]['workload_2'] = workload
        coloc_summary[pair]['colocated_runtime_2'] = result['life_time']
        coloc_summary[pair]['isolated_runtime_2'] = result['isolated_runtime']
        coloc_summary[pair]['power_consumption_2'] = result['isolated_power_consumption']

# Now calculate and add average_slowdown and affinity_score for each coloc pair
for pair, summary in coloc_summary.items():
    try:
        iso1 = float(summary['isolated_runtime_1'])
        iso2 = float(summary['isolated_runtime_2'])
        coloc1 = float(summary['colocated_runtime_1'])
        coloc2 = float(summary['colocated_runtime_2'])
        summary['average_slowdown'] = calc_slowdown_factor(iso1, iso2, coloc1, coloc2)
        summary['affinity_score'] = calc_affinity_score(iso1, coloc1, iso2, coloc2)
    except Exception as e:
        summary['average_slowdown'] = None
        summary['affinity_score'] = None
        print(f"Could not calculate for pair {pair}: {e}")

coloc_summary = dict(coloc_summary)
pprint.pprint(coloc_summary)

{'CpuCpu': {'affinity_score': 0.44712387221035627,
            'average_slowdown': 0.6143764454069651,
            'colocated_runtime_1': 0.311122,
            'colocated_runtime_2': 0.989985,
            'isolated_runtime_1': 0.290878,
            'isolated_runtime_2': 0.290878,
            'power_consumption_1': 'not yet computed',
            'power_consumption_2': 'not yet computed',
            'workload_1': 'cpu',
            'workload_2': 'cpu'},
 'CpuMem': {'affinity_score': 1,
            'average_slowdown': 1.607559174900998,
            'colocated_runtime_1': 0.176359,
            'colocated_runtime_2': 6.177636,
            'isolated_runtime_1': 0.290878,
            'isolated_runtime_2': 9.672737,
            'power_consumption_1': 'not yet computed',
            'power_consumption_2': 'not yet computed',
            'workload_1': 'cpu',
            'workload_2': 'memory'},
 'FileIOCpu': {'affinity_score': 0.3524561356933272,
               'average_slowdown': 0.4760178460