In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt


In [None]:
jobs = pd.read_csv("carbon_scheduler_jobs_1000.csv")
grid = pd.read_csv("gb_carbon_intensity.csv")

In [None]:
jobs.head(), grid.head()


In [None]:
jobs['arrival_time'] = pd.to_datetime(jobs['arrival_time'])
jobs['deadline_time'] = pd.to_datetime(jobs['deadline_time'])

grid['datetime'] = pd.to_datetime(grid['datetime'])
grid = grid.sort_values('datetime')


In [None]:
grid = pd.read_csv("gb_carbon_intensity.csv")
grid['datetime'] = pd.to_datetime(grid['datetime'])
grid = grid.sort_values('datetime')

grid = grid[['datetime','actual']].rename(columns={'actual':'carbon'})
grid = grid.set_index('datetime').resample('30min').mean().interpolate()

In [None]:
jobs = jobs.sort_values('arrival_time').copy()
jobs['remaining'] = jobs['compute_time_hours']


In [None]:
def simulate(jobs_df, grid_df, policy="FIFO"):
    jobs = jobs_df.copy()
    jobs['remaining'] = jobs['compute_time_hours']
    jobs['completed'] = False
    jobs['missed_deadline'] = False
    jobs['start_time'] = None
    jobs['finish_time'] = None

    t = jobs['arrival_time'].min().floor('30min')
    end = jobs['deadline_time'].max().ceil('30min')

    step = timedelta(minutes=30)
    energy_per_slot = 0.5  # kWh for 30 mins
    total_carbon = 0

    queue = []

    while t <= end:
        # add arrived jobs
        arriving = jobs[(jobs['arrival_time'] <= t) & (~jobs['completed']) & (~jobs.index.isin(queue))]
        queue += list(arriving.index)

        # remove completed
        queue = [i for i in queue if not jobs.loc[i,'completed']]

        if queue:
            if policy=="FIFO":
                job_idx = queue[0]

            elif policy=="EDF":
                job_idx = jobs.loc[queue].sort_values('deadline_time').index[0]

            elif policy=="CARBON_EDF":
                # pick job with earliest deadline
                df = jobs.loc[queue].sort_values('deadline_time')
                job_idx = df.index[0]

            else:
                raise ValueError("unknown scheduler")

            jobs.loc[job_idx,'remaining'] -= 0.5

            if jobs.loc[job_idx,'start_time'] is None:
                jobs.loc[job_idx,'start_time'] = t

            if jobs.loc[job_idx,'remaining'] <= 0:
                jobs.loc[job_idx,'completed'] = True
                jobs.loc[job_idx,'finish_time'] = t

            # carbon for this slot should only be added if a job was processed
            if t in grid_df.index:
                total_carbon += energy_per_slot * grid_df.loc[t,'carbon']

        t += step

    jobs['missed_deadline'] = jobs['finish_time'] > jobs['deadline_time']

    return total_carbon, jobs['missed_deadline'].mean()*100

In [None]:
fifo_carbon, fifo_miss = simulate(jobs, grid, "FIFO")
edf_carbon, edf_miss = simulate(jobs, grid, "EDF")
cedf_carbon, c_miss = simulate(jobs, grid, "CARBON_EDF")


In [None]:
results = pd.DataFrame({
    "Scheduler":["FIFO","EDF","Carbon-Aware EDF"],
    "Total Carbon (gCO2)":[fifo_carbon,edf_carbon,cedf_carbon],
    "% Missed Deadlines":[fifo_miss,edf_miss,c_miss]
})

results


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot Total Carbon
results.plot(x="Scheduler", y="Total Carbon (gCO2)", kind="line", ax=axes[0], marker='o')
axes[0].set_title("Scheduler Emissions Comparison")
axes[0].set_ylabel("Total Carbon (gCO2)")
axes[0].grid(True)

# Plot Missed Deadlines
results.plot(x="Scheduler", y="% Missed Deadlines", kind="line", ax=axes[1], marker='o', color='red')
axes[1].set_title("Scheduler Missed Deadlines Comparison")
axes[1].set_ylabel("% Missed Deadlines")
axes[1].grid(True)

plt.tight_layout()
plt.show()