In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
def plot(json_data, lower_bound=0, limit=None):
    x = []
    y = []
    if limit == None:
        limit = len(json_data['data'])
    for i in range(lower_bound, len(json_data['data'][:limit])):
        x.append(i)
        y.append(json_data['data'][i])
    plt.figure(figsize=(18,4))
    plt.plot(x,y)
    plt.show()

In [3]:
def analyze_preemption_probability(trace_data, window_size=4):
    """"

    Args:
        trace_data (dict):
                        format:
                           {
                               'region1': {
                                   'zoneA': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
                                   'zoneB': [10, 10, 10, 9, 8, 7, 6, 5, 4, 3]
                               },
                               'region2': {
                                   'zoneC': [5, 4, 3, 2, 1, 1, 1, 1, 1, 1],
                                   'zoneD': [8, 8, 8, 8, 7, 6, 5, 4, 3, 2]
                               },
                               'region3': {
                                   'zoneE': [12, 11, 10, 9, 8, 7, 6, 5, 4, 3],
                                   'zoneF': [7, 7, 6, 5, 4, 3, 2, 1, 1, 1]
                               }
                           }
        window_size (int): size of the time window, default value is 4。
    """

    if not trace_data or not isinstance(trace_data, dict):
        print("Error: trace_data must not be an empty dictionary")
        return 0.0

    all_window_starts = set()
    region_preemption_flags = {}

    for region_name in trace_data.keys():
        region_preemption_flags[region_name] = {}

    for region_name, zones in trace_data.items():
        for zone_name, nodes_data in zones.items():
            if not nodes_data or len(nodes_data) < window_size:
                print(f"Warning: {region_name}/{zone_name} is not sufficient for constructing a complete window")
                continue

            num_timestamps = len(nodes_data)
            for i in range(num_timestamps - window_size + 1):
                all_window_starts.add(i)

                window_data = nodes_data[i : i + window_size]
                preemption_in_window = False

                for j in range(1, len(window_data)):
                    if window_data[j] < window_data[j-1]:
                        preemption_in_window = True
                        break

                if i not in region_preemption_flags[region_name]:
                    region_preemption_flags[region_name][i] = False
                if preemption_in_window:
                    region_preemption_flags[region_name][i] = True

    if not all_window_starts:
        print("not enough valid time windows for analyzing")
        return 0.0

    total_preemption_windows = 0
    simultaneous_preemption_count = 0

    for window_start in sorted(list(all_window_starts)):
        active_preemption_regions = 0
        
        for region_name in trace_data.keys():
            if region_preemption_flags[region_name].get(window_start, False):
                active_preemption_regions += 1
        
        if active_preemption_regions >= 1:
            total_preemption_windows += 1

        if active_preemption_regions >= 2:
            simultaneous_preemption_count += 1

    if total_preemption_windows == 0:
        print("In all valid windows, not find any preemptions")
        return 0.0
    
    probability = simultaneous_preemption_count / total_preemption_windows
    return probability

In [4]:
def prepare_trace_data(trace_list):
    trace_data = {}
    for region_name, zone_dict in trace_list.items():
        trace_data[region_name] = {}
        for zone_name, zone_path in zone_dict.items():
            with open(zone_path) as f:
                f_content = f.read()
            zone_json = json.loads(f_content)
            trace_data[region_name][zone_name] = zone_json['data']
    
    return trace_data

In [5]:
trace_list_1 = {
    "region1": {
        "zone1": "/root/ElasticCDC/conf/traces/preemption/4-node/aws-08-03-2023/us-east-1f_v100_1.json"
        },
    "region2": {
        "zone1": "/root/ElasticCDC/conf/traces/preemption/4-node/aws-08-03-2023/us-east-2a_v100_1.json"
        },
    "region3": {
        "zone1": "/root/ElasticCDC/conf/traces/preemption/4-node/aws-08-03-2023/us-west-2c_v100_1.json"
    }
}

trace_data_1 = prepare_trace_data(trace_list_1)

In [6]:
analyze_preemption_probability(trace_data_1, window_size=36)

0.375170905113481

In [7]:
trace_list_2 = {
    "region1": {
        "zone1": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-east-1a_v100_1.json",
        "zone2": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-east-1c_v100_1.json",
        "zone3": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-east-1d_v100_1.json",
        "zone4": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-east-1f_v100_1.json"
        },
    "region2": {
        "zone1": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-east-2a_v100_1.json",
        "zone2": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-east-2b_v100_1.json"
        },
    "region3": {
        "zone1": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-west-2a_v100_1.json",
        "zone2": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-west-2b_v100_1.json",
        "zone3": "/root/ElasticCDC/conf/traces/availability/1-node/aws-02-15-2023/us-west-2c_v100_1.json"
    }
}

trace_data_2 = prepare_trace_data(trace_list_2)

In [9]:
analyze_preemption_probability(trace_data_2, window_size=54)

0.6002299331103679

In [5]:
trace_list_3 = {
    "region1": {
        "zone1": "/root/ElasticCDC/conf/traces/availability/16-node/aws-08-27-2023/us-east-2b_v100_1.json"
    },
    "region2": {
        "zone1": "/root/ElasticCDC/conf/traces/availability/16-node/aws-08-27-2023/us-west-2a_v100_1.json"
    },
    "region3": {
        "zone1": "/root/ElasticCDC/conf/traces/availability/16-node/aws-08-27-2023/us-west-2c_v100_1.json"
    }
}

trace_data_3 = prepare_trace_data(trace_list_3)

In [6]:
analyze_preemption_probability(trace_data_3, window_size=36)

0.570487877880874

In [10]:
trace_list_4 = {
    "region1": {
        "zone1": "/root/ElasticCDC/conf/traces/preemption/1-node/aws-04-22-2023/us-east-1c_v100_1.json",
        "zone2": "/root/ElasticCDC/conf/traces/preemption/1-node/aws-04-22-2023/us-east-1f_v100_1.json"
    },
    "region2": {
        "zone1": "/root/ElasticCDC/conf/traces/preemption/1-node/aws-04-22-2023/us-west-2b_v100_1.json",
        "zone2": "/root/ElasticCDC/conf/traces/preemption/1-node/aws-04-22-2023/us-west-2c_v100_1.json"
    }
}

trace_data_4 = prepare_trace_data(trace_list_4)

In [11]:
analyze_preemption_probability(trace_data_4, window_size=338)

0.4636135157777157