In [1]:
import numpy as np
import pandas as pd


def generate_tasks(n_tasks: int) -> pd.DataFrame:
    due_date = np.random.randint(low=5, high=30, size=n_tasks)
    processing_time = np.maximum(1, due_date - np.random.randint(low=0, high=5, size=n_tasks))
    importance = np.random.randint(low=1, high=6, size=n_tasks)

    return pd.DataFrame({
        "due_date": due_date,
        "processing_time": processing_time,
        "importance": importance,
        "done": False,
        "lateness": 0,
        "completion_order": 0,
        "completion_time": 0
    })


def execute_task(tasks: pd.DataFrame, task_id: int, current_time: int, order: int) -> tuple[pd.DataFrame, int]:
    task = tasks.loc[task_id]
    start_time = current_time
    completion_time = start_time + task["processing_time"]
    lateness = max(0, completion_time - task["due_date"])

    tasks.loc[task_id, "done"] = True
    tasks.loc[task_id, "lateness"] = lateness
    tasks.loc[task_id, "completion_order"] = order
    tasks.loc[task_id, "completion_time"] = completion_time
    return tasks, completion_time


def run_earliest_due_date_algo(tasks: pd.DataFrame) -> pd.DataFrame:
    current_time = 0
    for i in range(1, len(tasks) + 1):
        tasks = tasks.sort_values(by=["due_date", "processing_time"])
        next_task_id = tasks[~tasks["done"]].index[0]
        tasks, current_time = execute_task(tasks, next_task_id, current_time, i)
    return tasks.sort_index()


def run_shortest_processing_time_algo(tasks: pd.DataFrame) -> pd.DataFrame:
    current_time = 0
    for i in range(1, len(tasks) + 1):
        tasks = tasks.sort_values(by=["processing_time", "due_date"])
        next_task_id = tasks[~tasks["done"]].index[0]
        tasks, current_time = execute_task(tasks, next_task_id, current_time, i)
    return tasks.sort_index()


def metrics(executed_tasks: pd.DataFrame):
    max_lateness = executed_tasks["lateness"].max()
    total_lateness = executed_tasks["lateness"].sum()
    total_completion_time = executed_tasks["completion_time"].sum()
    return max_lateness, total_lateness, total_completion_time


n = 100
max_lateness_edd = np.zeros(n, dtype=int)
total_lateness_edd = np.zeros(n, dtype=int)
total_completion_time_edd = np.zeros(n, dtype=int)

max_lateness_spt = np.zeros(n, dtype=int)
total_lateness_spt = np.zeros(n, dtype=int)
total_completion_time_spt = np.zeros(n, dtype=int)

for iter in range(n):
# Example usage
    tasks = generate_tasks(n_tasks=10)
    max_lateness_edd[iter], total_lateness_edd[iter], total_completion_time_edd[iter] = metrics(run_earliest_due_date_algo(tasks.copy()))
    max_lateness_spt[iter], total_lateness_spt[iter], total_completion_time_spt[iter] = metrics(run_shortest_processing_time_algo(tasks.copy()))

print("How often do the algorihtms outperform each other?")
print(f"Total Completion Time:\nSPT {(total_completion_time_spt < total_completion_time_edd).mean()}, EDD {(total_completion_time_spt > total_completion_time_edd).mean()}")
print(f"Maximum Lateness:\nSPT {(max_lateness_spt < max_lateness_edd).mean()}, EDD {(max_lateness_spt > max_lateness_edd).mean()}")
print(f"Total Lateness:\nSPT {(total_lateness_spt < total_lateness_edd).mean()}, EDD {(total_lateness_spt > total_lateness_edd).mean()}")

How often do the algorihtms outperform each other?
Total Completion Time:
SPT 0.75, EDD 0.0
Maximum Lateness:
SPT 0.0, EDD 0.12
Total Lateness:
SPT 0.71, EDD 0.02
