In [1]:
import pandas as pd

carbon_policy_labels = {
    "carbon_waiting": "Lowest-\nWindow",
    "carbon_oracle":  "Lowest-\nWindow*",
    "carbon_lowest": "Lowest-\nSlot",
    "suspend-resume_oracle": "Wait AWhile",
    "suspend-resume-threshold_oracle": "Ecovisor",
    "carbon_cst_oracle": "Carbon-\nTime*",
    "carbon_cst_average": "Carbon-\nTime",
    "cost_oracle": "AllWait-\nThreshold",
    "carbon-cost_cst_average": "RES-First-\nCarbon-Time",
    "carbon-cost_waiting": "RES-First-\nLowest-\nWindow",
    "carbon-spot_cst_average": "Spot-First-\nCarbon-Time",
    "suspend-resume-spot_oracle": "Spot-First-\nWaitAwhile",
    "suspend-resume-spot-threshold_oracle": "Spot-First-\nEcovisor",
    "carbon-cost-spot_cst_average": "SPOT-RES-\nCarbon-Time"
}

# steal / copy this one from the existing GAIA notebooks
def load_task_details(cluster_type, task_trace, scheduling_policy, carbon_start_index, carbon_policy, carbon_trace, reserved, waiting_times_str):
    if cluster_type =="slurm":
        file_name = f"../results/{cluster_type}/{task_trace}/slurm-details-{scheduling_policy}-{carbon_start_index}-{carbon_policy}-{carbon_trace}-{reserved}-{waiting_times_str}.csv"             
    else:
        file_name = f"../results/{cluster_type}/{task_trace}/details-{scheduling_policy}-{carbon_start_index}-{carbon_policy}-{carbon_trace}-{reserved}-{waiting_times_str}.csv"             
    df = pd.read_csv(file_name)
    df["carbon_policy"] = carbon_policy_labels[scheduling_policy+"_"+carbon_policy]
    df["scheduling_policy"] = scheduling_policy
    df["start_index"] = carbon_start_index
    df["task_trace"] = task_trace    
    df = df[df['ID'] != -1]
    return df

In [2]:
import plotly.express as px
from datetime import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pytz

import importlib.util
import sys


spec = importlib.util.spec_from_file_location("carbon", "../src/carbon.py")
foo = importlib.util.module_from_spec(spec)
sys.modules["carbon"] = foo
spec.loader.exec_module(foo)

import carbon


traces = ['phased'] # , "pai_200"

waiting_times = ["48", "6x24", "4"]

scheduling_policies = [ 
   #  ("carbon", "lowest"),
    ("carbon", "oracle"),
   #  ("carbon", "cst_average"),
    # ("suspend-resume-threshold", "oracle"),
    # ("suspend-resume-threshold", "oracle"),
    # ("suspend-resume", "oracle"),
]


carbon_trace = carbon.get_carbon_model("AU-SA", 7000, extra_columns=True)
start_date_in_carbon_trace_as_timestamp = carbon_trace.df.iloc[0]["timestamp"]

fig_carbon = px.scatter(carbon_trace.df, x='datetime', y="carbon_intensity_avg", color="carbon_intensity_avg", color_continuous_scale=px.colors.sequential.speed)

def time_to_dates(seconds_since_simulation_start) -> str:
    adjusted_timestamp = seconds_since_simulation_start + start_date_in_carbon_trace_as_timestamp
    date = datetime.fromtimestamp(adjusted_timestamp, pytz.timezone('UTC'))
    return date 

for trace in traces:
    
    for policy in scheduling_policies:
        for waiting_time in waiting_times:

            scheduling_policy = policy[0]
            carbon_policy = policy[1]
            df = load_task_details("simulation", trace, policy[0], 7000, policy[1], "AU-SA", 0, waiting_time).sort_values(by=["start_time", "length"])

            df["start_time_date"] = df["start_time"].apply(time_to_dates)
            df["submission_date"] = df["arrival_time"].apply(time_to_dates)
            # df["deadline"] = (df["arrival_time"] + (int(waiting_time) * 3600)).apply(time_to_dates)
            df["exit_time_date"] = df["exit_time"].apply(time_to_dates)

            min_date_in_trace = time_to_dates(df["start_time"].min())
            max_date_in_trace = time_to_dates(df["exit_time"].max())

            fig_gantt = px.timeline(df, x_start="start_time_date", x_end="exit_time_date", y="ID", hover_data=["start_time", "arrival_time"])

            submission_markers = []

            for row in df.itertuples(index=False):
                submission_markers.append({'type': 'line', 'x0': row.submission_date, 'x1': row.start_time_date, 'y0': row.ID, 'y1': row.ID, 'xref': 'x1', 'yref':'y1', 'line': dict(color="MediumPurple", width=2, dash="dot")})

            fig = make_subplots(rows=2, cols=1, shared_xaxes=True)

            fig.add_trace(fig_gantt.data[0], row=1, col=1)
            fig.add_trace(fig_carbon.data[0], row=2, col=1)

            title_key = f"{scheduling_policy}_{carbon_policy}"
            title = f"{scheduling_policy}_{carbon_policy} ({carbon_policy_labels.get(title_key, '')})"

            fig.update_layout(
                title_text = f"{trace}'s scheduling via {title}, {waiting_time}",
                xaxis=dict(
                    type='date',
                ),
                xaxis2=dict(
                    type='date'
                ), 
                shapes=submission_markers
            )
            fig.update_xaxes(title_text="Date", range=[min_date_in_trace, max_date_in_trace])

            fig.update_yaxes(title_text="Job ID", fixedrange=True, row=1, col=1)
            fig.update_yaxes(title_text="Carbon intensity in gCO₂eq/kWh", fixedrange=True, row=2, col=1)

            yaxis2 = fig.layout.yaxis2

            fig.update_layout({'yaxis': {'range': [-0.5,df['ID'].max() + 1], 'tickmode': 'linear'}})
            fig.update_layout({'yaxis2': {'range': [0,0.5]}})

            fig.show()

In [103]:
import pulp

import sys
sys.path.append('../src/')

import power_consumption_profiles as pcp
from task import Task, set_waiting_times
# Plan: have an LP Problem where we determine startup and work phases

import plotly.graph_objects as go

mockPowerFunction = pcp.FooPowerFunction(pcp.foo_phases_spec)
set_waiting_times("24")


mockTask = Task(0, 0, task_length=int(mockPowerFunction.get_length()), CPUs=1, total_execution_time=0, power_consumption_function=mockPowerFunction)

DEADLINE: int = 3600 * 1

# Define the problem
prob = pulp.LpProblem("StopResumeEnergyAwareScheduling", pulp.LpMinimize)


# Example data
WORK_LENGTH = 100 # mockTask.task_length  # Processing time for the job

print(f"Processing time is {WORK_LENGTH}")

STARTUP_LENGTH =  200 #int(mockPowerFunction.duration_startup)  # Startup time for the job

print(f"startup_time time is {STARTUP_LENGTH}")


seconds_carbon_trace = carbon_trace.extend(5)

carbon_cost_at_time = seconds_carbon_trace.df['carbon_intensity_avg'].to_dict()

# Variables
starting = pulp.LpVariable.dicts("starting", (t for t in range(DEADLINE)), cat="Binary")
startup_finished = pulp.LpVariable.dicts("start", (t for t in range(DEADLINE)), cat="Binary")
work = pulp.LpVariable.dicts("work", (t for t in range(DEADLINE)), cat="Binary")

# Objective: minimize carbon cost considering varying energy consumption
carbon_cost = pulp.lpSum(starting[t] * carbon_cost_at_time[t] + work[t] * carbon_cost_at_time[t] for t in range(DEADLINE))
prob += carbon_cost


# spend enough time processing
prob += pulp.lpSum(work[t] for t in range(DEADLINE)) == WORK_LENGTH

for t in range(DEADLINE - 1):
    if t >= STARTUP_LENGTH:
        # Ensure the job undergoes the startup phase whenever it resumes
        prob += startup_finished[t] >= work[t + 1] - (work[t] if t > 0 else 0)

        # we can not be in startup and work at the same time
        prob += startup_finished[t] + work[t] <= 1
        prob += starting[t] + work[t] <= 1

# startup_done = pulp.LpVariable.dicts("remaining_start", (t for t in range (deadline)), lowBound=0, cat="Integer")


# for i in range(STARTUP_LENGTH - 1, DEADLINE):
#     for j in range(STARTUP_LENGTH):
#         prob += startup_finished[i] <= starting[i - j], f"Link_{i}_{j}"
    # If end_vars[i] is 1, it means dict1[i-N+1:i+1] should all be 1
    #prob += pulp.lpSum([starting[i - j] for j in range(STARTUP_LENGTH)]) >= STARTUP_LENGTH * startup_finished[i], f"Contiguity_{i}"

# When we are working, we must not have any remaining startup phase left to do, so if work == 1 -> remaining_startup == 0
# for t in range(deadline - 1):
#     if t >= startup_time:

        # if not working, its limited by 1000,
        # if working, it must be 0
        # prob += remaining_startup[t] <= 1000 * (1 - work[t])

        # if going from starting to not starting (== when we being working), set remaining startup to the startup duration
        #                                                  1     -    0 == 1
        # prob += startup_done[t] >= startup_time * (start[t] - start[t - 1]) 

        # prob += pulp.lpSum()

        # the remaining_startup time needs to always count down with increasing time
        # prob += remaining_startup[t] >= remaining_startup[t + 1] + 1

        # finally, whenever there is remaining startup, we should obviously also be in the startup phase
        # prob += remaining_startup[t] <= 1000 * start[t]
        # prob += remaining_startup[t] >= start[t]

# 
# # while in startup, the remaining_startup needs to count down from the startup_duration
# for t in range(deadline - 1):
#     if t >= startup_time:
#         prob += remaining_startup[t] >= remaining_startup[t + 1] + 1

# 

# Solve the problem


prob.solve()

print(f"Status: {pulp.LpStatus[prob.status]}")

print(f"Job schedule:")
for t in range(DEADLINE):
    if pulp.value(starting[t]) is not None and pulp.value(starting[t])  > 0:
        print(f"  Time {t}: Starting")

    if pulp.value(startup_finished[t]) is not None and pulp.value(startup_finished[t])  > 0:
        print(f"  Time {t}: Startup finished")

    if pulp.value(work[t]) > 0:
        print(f"  Time {t}: Processing")

print(f"Carbon cost: {pulp.value(carbon_cost)}")

Processing time is 100
startup_time time is 200
Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/vincent/Documents/Masterarbeit/master-thesis/GAIA/.venv/lib/python3.12/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/cd5b4ceef2e4404da3d9778ed33152bf-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/cd5b4ceef2e4404da3d9778ed33152bf-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 10203 COLUMNS
At line 65995 RHS
At line 76194 BOUNDS
At line 86794 ENDATA
Problem MODEL has 10198 rows, 10599 columns and 27393 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 0.72502 - 0.03 seconds
Cgl0004I processed model has 6799 rows, 6881 columns (6881 integer (6839 of which binary)) and 20477 elements
Cbc0038I Initial state - 0 integers unsatisfied sum - 0
Cbc0038I Solution found of 0.72502
Cbc0038I Cleaned solution of 0.72502
Cbc

In [104]:

prototype_fig = make_subplots(rows=2, cols=1, shared_xaxes=True)

times = list(carbon_cost_at_time.keys())
carbon_costs = list(carbon_cost_at_time.values())

fig_carbon = px.scatter(x=times, y=carbon_cost)
prototype_fig.add_trace(fig_carbon.data[0], row=2, col=1)

# Okay, nice. Let's visualize our prototype scheduling approach

variables = [(starting, 'starting'), (startup_finished, 'startup_finished'), (work, 'work')]

df = pd.DataFrame(data=[], columns=['time', 'value', 'name'])



for variable, name in variables:
    as_dict = {t: pulp.value(variable[t]) if pulp.value(variable[t]) is not None else 0 for t in range(DEADLINE)}

    foo = pd.DataFrame(list(as_dict.items()), columns=['time', 'value'])
    foo['name'] = name

    foo = foo[foo['value'] != 0]

    df = pd.concat([df, foo], ignore_index=True)


print(df)

scheduling_plot = px.scatter(df, x='time', y='value', color='name')

prototype_fig.add_traces([plot for plot in scheduling_plot.data], rows=1, cols=1)

prototype_fig.update_xaxes(title_text="Timestamp", range=[df['time'].min()-10, df['time'].max()+10])

prototype_fig.update_yaxes(title_text="Variable Value", fixedrange=True, row=1, col=1)
prototype_fig.update_yaxes(title_text="Carbon intensity in gCO₂eq/Wh", fixedrange=True, row=2, col=1)

prototype_fig.update_layout({'yaxis': {'range': [-0.5, 1.5], 'tickmode': 'linear'}})
prototype_fig.update_layout({'yaxis2': {'range': [0,0.1]}})

prototype_fig.show()


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



     time  value              name
0     834    1.0  startup_finished
1     854    1.0  startup_finished
2     959    1.0  startup_finished
3    1514    1.0  startup_finished
4    1524    1.0  startup_finished
..    ...    ...               ...
101  2415    1.0              work
102  2416    1.0              work
103  2417    1.0              work
104  2418    1.0              work
105  2419    1.0              work

[106 rows x 3 columns]


In [50]:
import pulp

# Define the problem
problem = pulp.LpProblem("Contiguous_True_Values_Problem", pulp.LpMinimize)

# Example dictionary of boolean variables
length = 10  # Length of the boolean sequence
N = 3  # Required number of contiguous true values
dict1 = {i: pulp.LpVariable(f'dict1_{i}', cat='Binary') for i in range(length)}

# Introduce additional variables to capture the end of the contiguous segment
end_vars = {i: pulp.LpVariable(f'end_{i}', cat='Binary') for i in range(N - 1, length)}

# Ensure there is at least one end point
problem += pulp.lpSum([end_vars[i] for i in end_vars.keys()]) >= 1, "AtLeastOneEnd"

# Ensure contiguity by linking end_vars to dict1
for i in range(N - 1, length):
    for j in range(N):
        problem += end_vars[i] <= dict1[i - j], f"Link_{i}_{j}"
    # If end_vars[i] is 1, it means dict1[i-N+1:i+1] should all be 1
    problem += pulp.lpSum([dict1[i - j] for j in range(N)]) >= N * end_vars[i], f"Contiguity_{i}"

# Define an objective function (dummy, since we only care about the constraints)
problem += pulp.lpSum([dict1[i] for i in dict1.keys()]), "DummyObjective"

# Solve the problem
problem.solve()

# Check the results
for i in dict1.keys():
    print(f'dict1[{i}] = {pulp.value(dict1[i])}')


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/vincent/Documents/Masterarbeit/master-thesis/GAIA/.venv/lib/python3.12/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/9f28273a79b0457bb8c85d920a979533-pulp.mps -timeMode elapsed -branch -printingOptions all -solution /tmp/9f28273a79b0457bb8c85d920a979533-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 38 COLUMNS
At line 173 RHS
At line 207 BOUNDS
At line 226 ENDATA
Problem MODEL has 33 rows, 18 columns and 88 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 1.25 - 0.00 seconds
Cgl0004I processed model has 33 rows, 18 columns (18 integer (18 of which binary)) and 88 elements
Cutoff increment increased from 1e-05 to 0.9999
Cbc0038I Initial state - 18 integers unsatisfied sum - 2.25
Cbc0038I Pass   1: suminf.    2.25000 (18) obj. 1.25 iterations 0
Cbc0038I Solution found of 10
Cbc0038