In [1]:
from scheduling_simulation import run_simulation, get_log, SimulationConfig, FiFoScheduler, SimpleGreenScheduler
import plotly.express as px
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from datetime import datetime

config = SimulationConfig()

config.scheduler = FiFoScheduler
config.submit_interval_generator_scale = 60 * 60


run_simulation(config)
simulation_log = get_log()

keys = simulation_log.keys()
for key in keys:
    print(key, simulation_log.get(key))


job_queue [{'type': 'append', 'time': 1672614000.0, 'length': 1}, {'type': 'pop', 'time': 1672614000.0, 'length': 0}, {'type': 'append', 'time': 1672614071.3039854, 'length': 1}, {'type': 'pop', 'time': 1672614071.3039854, 'length': 0}, {'type': 'append', 'time': 1672616052.5383267, 'length': 1}, {'type': 'pop', 'time': 1672616052.5383267, 'length': 0}, {'type': 'append', 'time': 1672618771.623215, 'length': 1}, {'type': 'pop', 'time': 1672618771.623215, 'length': 0}, {'type': 'append', 'time': 1672640579.5343046, 'length': 1}, {'type': 'pop', 'time': 1672640579.5343046, 'length': 0}, {'type': 'append', 'time': 1672648748.275096, 'length': 1}, {'type': 'pop', 'time': 1672648748.275096, 'length': 0}, {'type': 'append', 'time': 1672652598.1161838, 'length': 1}, {'type': 'pop', 'time': 1672652598.1161838, 'length': 0}, {'type': 'append', 'time': 1672653872.5523357, 'length': 1}, {'type': 'pop', 'time': 1672653872.5523357, 'length': 0}, {'type': 'append', 'time': 1672659244.542223, 'length

In [2]:
compute_node_events = simulation_log.get('compute_node_events')
mapped_events = [{**event, **{'start': datetime.fromtimestamp(event.get('start')), 'end': datetime.fromtimestamp(event.get('end'))} } for event in compute_node_events]
df = simulation_log.get('compute_node_events')
fig = px.timeline(mapped_events, x_start='start', x_end='end', y='id', title='Scheduled Jobs by Nodes')
fig.show()

# We are also interested in the Queue Length, increasing length suggesting job starvation

queue_events = simulation_log.get('job_queue')
mapped_queue_events = [{**event, **{'time': datetime.fromtimestamp(event.get('time'))}} for event in queue_events]
queue_fig = px.line(mapped_queue_events, y='length', x='time', line_shape='hv')
queue_fig.show()

In [3]:
power_draw_data = simulation_log.get('compute_node_dataframe')

nodes_power = pd.DataFrame(power_draw_data)
nodes_power['datetime'] =nodes_power['timestamp'].apply(datetime.utcfromtimestamp)

# Group by 'ID'
grouped = nodes_power.sort_values(by='timestamp').groupby('id')

# Create a dictionary to store DataFrames for each ID
dfs_by_id = {}

# Iterate over groups and create a DataFrame for each ID
for id_, group_df in grouped:
    dfs_by_id[id_] = group_df

def get_power_of_node(timestamp: int, nodeId: int) -> int:
    # check wether that node was used and is thus in the log
    if (not nodeId in dfs_by_id):
        return 0

    power_log_of_node = dfs_by_id[nodeId]
    index = power_log_of_node['timestamp'].searchsorted(timestamp, side='right')
    # the index from searchsorted is where something of that value would be inserted, so we substract 1 to find the last power draw
    found_row = power_log_of_node.iloc[index - 1]
    return found_row['power']

accumulated_power = pd.DataFrame(pd.unique(nodes_power['timestamp']), columns=['timestamp'])

def get_accumulated_power_for_timestamp(timestamp: int) -> int:
    nodeIds = dfs_by_id.keys()
    foo = [get_power_of_node(timestamp, int(nodeId)) for nodeId in nodeIds]
    return sum(foo)

accumulated_power['datetime'] = accumulated_power['timestamp'].apply(datetime.utcfromtimestamp)

accumulated_power['power'] = accumulated_power['timestamp'].apply(get_accumulated_power_for_timestamp) 

accumulated_power_fig = px.line(accumulated_power, x=accumulated_power['datetime'], y=accumulated_power['power'], line_shape='hv')
accumulated_power_fig.show()


In [4]:
carbon_data = pd.read_csv('data/DE_2023_hourly.csv', delimiter=',')
carbon_data['datetime'] = pd.to_datetime(carbon_data['Datetime (UTC)'])
carbon_data['hour_of_day'] = carbon_data['datetime'].dt.hour.astype(int)

# Create the line plot with continuous color scale
fig1 = px.scatter(carbon_data, x=carbon_data['datetime'], y=carbon_data['Carbon Intensity gCO₂eq/kWh (direct)'], color=carbon_data['hour_of_day'], color_continuous_scale=['black', 'yellow', 'black'])
fig1.show()

fig2= px.scatter(carbon_data, x=carbon_data['datetime'], y=carbon_data['Carbon Intensity gCO₂eq/kWh (LCA)'], color=carbon_data['hour_of_day'], color_continuous_scale=['black', 'yellow', 'black'])
fig2.show()

In [5]:
carbon_data['timestamp'] = carbon_data['Datetime (UTC)'].apply(lambda datetime_string: datetime.strptime(datetime_string, "%Y-%m-%d %H:%M:%S").timestamp())

# We run a linear interpolation between those points
def get_carbon_intensity_at_timestamp(timestamp: int) -> int:
    return np.interp(x = timestamp, xp=carbon_data['timestamp'], fp=carbon_data['Carbon Intensity gCO₂eq/kWh (direct)'])

# Berechne die den benutzen CO2 Austoß
# Wir müssen dafür den Integral von Power/Timestamp * Co2/Power berechnen, das können wir mit der https://www.biancahoegel.de/mathe/verfahr/mittelpunktsregel.html

integral = 0
total_power = 0

for i in range(0, len(accumulated_power.index) - 1):
    current = accumulated_power.iloc[i]
    next_entry = accumulated_power.iloc[i + 1] 

    # timestamps are given in seconds, we thus divide by 3600 to get hours
    timedif_in_hours = (next_entry['timestamp'] - current['timestamp']) / 60 / 60

    # Mittelpunktsregel, wir nehmen den mittleren Wert weil so die beiden "Dreiecke" links und rechts sich gegenseitig aufwiegen
    middle_timestamp = (next_entry['timestamp'] + current['timestamp']) / 2

    # kWh * gCo2 / kwh
    integral += timedif_in_hours  * current['power'] * (get_accumulated_power_for_timestamp(middle_timestamp))
    total_power += timedif_in_hours * current['power']


print('{0:.2f}'.format(integral/1000), ' kg CO2')
print(total_power, ' ')

68.53  kg CO2
1275.8274765623937  
