In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt


In [2]:
jobs = pd.read_csv("carbon_scheduler_jobs_1000.csv")
grid = pd.read_csv("gb_carbon_intensity.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'carbon_scheduler_jobs_1000.csv'

In [None]:
jobs.head(), grid.head()


In [None]:
jobs['arrival_time'] = pd.to_datetime(jobs['arrival_time'])
jobs['deadline_time'] = pd.to_datetime(jobs['deadline_time'])

grid['datetime'] = pd.to_datetime(grid['datetime'])
grid = grid.sort_values('datetime')


In [None]:
grid = pd.read_csv("gb_carbon_intensity.csv")
grid['datetime'] = pd.to_datetime(grid['datetime'])
grid = grid.sort_values('datetime')

grid = grid[['datetime','actual']].rename(columns={'actual':'carbon'})
grid = grid.set_index('datetime').resample('30min').mean().interpolate()

In [None]:
jobs = jobs.sort_values('arrival_time').copy()
jobs['remaining'] = jobs['compute_time_hours']


In [None]:
import pandas as pd
from datetime import timedelta
import matplotlib.pyplot as plt

# 1. Load and Preprocess
jobs = pd.read_csv("carbon_scheduler_jobs_1000.csv")
grid = pd.read_csv("gb_carbon_intensity.csv")

# Clean Dates & Remove Timezones
jobs['arrival_time'] = pd.to_datetime(jobs['arrival_time']).dt.tz_localize(None)
jobs['deadline_time'] = pd.to_datetime(jobs['deadline_time']).dt.tz_localize(None)
grid['datetime'] = pd.to_datetime(grid['datetime']).dt.tz_localize(None)

# 2. Resample Grid to 30min
grid = grid.sort_values('datetime').set_index('datetime')
grid = grid[['actual']].resample('30min').mean().interpolate()
grid = grid.rename(columns={'actual': 'carbon'})

# 3. FIX: "Time Travel" (Align Jobs to Grid Years)
time_shift = jobs['arrival_time'].min() - grid.index.min()
jobs['arrival_time'] -= time_shift
jobs['deadline_time'] -= time_shift

# 4. FIX: "Relax Deadlines" (Give the scheduler room to breathe)
# We add 24 hours to simulate a flexible "Batch" SLA
jobs['deadline_time'] += timedelta(hours=24)

# 5. Define Simulator (Your Logic)
def simulate(jobs_df, grid_df, policy="FIFO"):
    jobs = jobs_df.copy().sort_values('arrival_time')
    jobs['remaining'] = jobs['compute_time_hours']
    jobs['completed'] = False
    jobs['finish_time'] = pd.NaT

    t = jobs['arrival_time'].min().floor('30min')
    end = jobs['deadline_time'].max().ceil('30min')
    avg_carbon = grid_df['carbon'].mean()
    queue = []
    total_carbon = 0

    while t <= end:
        # Add Arrivals
        arriving = jobs[(jobs['arrival_time'] <= t) & (~jobs['completed']) & (~jobs.index.isin(queue))]
        queue.extend(arriving.index)

        if queue:
            # Policy Selection
            if policy == "FIFO":
                candidate = queue[0]
            else: # EDF & Carbon-Aware
                candidate = jobs.loc[queue].sort_values('deadline_time').index[0]

            job_to_run = candidate

            # --- CARBON LOGIC ---
            if policy == "CARBON_EDF":
                current_carbon = grid_df.loc[t, 'carbon'] if t in grid_df.index else 0
                deadline = jobs.loc[candidate, 'deadline_time']
                duration = timedelta(hours=jobs.loc[candidate, 'remaining'])
                slack = deadline - t - duration

                # PAUSE Decision
                if current_carbon > avg_carbon and slack > timedelta(hours=1):
                    job_to_run = None # Pause

            # Execute
            if job_to_run is not None:
                jobs.loc[job_to_run, 'remaining'] -= 0.5
                if jobs.loc[job_to_run, 'remaining'] <= 0:
                    jobs.loc[job_to_run, 'completed'] = True
                    jobs.loc[job_to_run, 'finish_time'] = t
                    queue.remove(job_to_run)

                # Count Carbon
                if t in grid_df.index:
                    total_carbon += 0.5 * grid_df.loc[t, 'carbon']

        t += timedelta(minutes=30)

    missed = (jobs['finish_time'] > jobs['deadline_time']).mean() * 100
    return total_carbon, missed

# 6. Run & Plot
print("Running Simulation...")
fifo_c, fifo_m = simulate(jobs, grid, "FIFO")
edf_c, edf_m = simulate(jobs, grid, "EDF")
cedf_c, cedf_m = simulate(jobs, grid, "CARBON_EDF")

results = pd.DataFrame({
    "Scheduler": ["FIFO", "EDF", "Carbon-Aware"],
    "Carbon": [fifo_c, edf_c, cedf_c],
    "Missed %": [fifo_m, edf_m, cedf_m]
})

print(results)
results.plot(x="Scheduler", y="Carbon", kind="bar", title="Carbon Footprint (Lower is Better)")
plt.show()

In [None]:
fifo_carbon, fifo_miss = simulate(jobs, grid, "FIFO")
edf_carbon, edf_miss = simulate(jobs, grid, "EDF")
cedf_carbon, c_miss = simulate(jobs, grid, "CARBON_EDF")


In [None]:
results = pd.DataFrame({
    "Scheduler":["FIFO","EDF","Carbon-Aware EDF"],
    "Total Carbon (gCO2)":[fifo_carbon,edf_carbon,cedf_carbon],
    "% Missed Deadlines":[fifo_miss,edf_miss,c_miss]
})

results


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot Total Carbon
results.plot(x="Scheduler", y="Total Carbon (gCO2)", kind="line", ax=axes[0], marker='o')
axes[0].set_title("Scheduler Emissions Comparison")
axes[0].set_ylabel("Total Carbon (gCO2)")
axes[0].grid(True)

# Plot Missed Deadlines
results.plot(x="Scheduler", y="% Missed Deadlines", kind="line", ax=axes[1], marker='o', color='red')
axes[1].set_title("Scheduler Missed Deadlines Comparison")
axes[1].set_ylabel("% Missed Deadlines")
axes[1].grid(True)

plt.tight_layout()
plt.show()