In [3]:
from google.colab import files
import os

CSV_PATH = "/content/load_balancing_realistic_sla.csv"
if not os.path.exists(CSV_PATH):
    print("Upload load_balancing_realistic_sla.csv")
    uploaded = files.upload()
    os.rename(list(uploaded.keys())[0], CSV_PATH)

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

df = pd.read_csv(CSV_PATH).reset_index(drop=True)

NUM_CPUS = 4
EPOCHS = 10
BATCH = 128
LR = 1e-3


def build_graph(row):
    X = []
    y = []

    for i in range(NUM_CPUS):
        completion = (
            row[f"cpu_{i}_wait_time"]
            + row["task_size"] / row[f"cpu_{i}_speed"]
        )

        X.append([
            row[f"cpu_{i}_queue_len"],
            row[f"cpu_{i}_load"],
            row[f"cpu_{i}_wait_time"],
            row[f"cpu_{i}_speed"],
            row["task_size"],
            row["priority"],
            row["system_load_avg"]
        ])

        y.append(completion)

    return (
        torch.tensor(X, dtype=torch.float32, device=device),
        torch.tensor(y, dtype=torch.float32, device=device)
    )


class GNNBalancer(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.fc1 = nn.Linear(in_dim * 2, 64)
        self.fc2 = nn.Linear(64, 32)
        self.out = nn.Linear(32, 1)

    def forward(self, X):
        global_ctx = X.mean(dim=0, keepdim=True).repeat(X.size(0), 1)
        H = torch.cat([X, global_ctx], dim=1)
        H = torch.relu(self.fc1(H))
        H = torch.relu(self.fc2(H))
        return self.out(H).squeeze()

model = GNNBalancer(in_dim=7).to(device)
optimizer = optim.Adam(model.parameters(), lr=LR)
loss_fn = nn.MSELoss()


model.train()
indices = np.arange(len(df))

for epoch in range(EPOCHS):
    np.random.shuffle(indices)
    total_loss = 0.0

    for i in range(0, len(indices), BATCH):
        batch = indices[i:i+BATCH]
        optimizer.zero_grad()
        loss = 0.0

        for j in batch:
            row = df.iloc[j]
            X, y = build_graph(row)
            pred = model(X)
            loss += loss_fn(pred, y)

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(indices):.4f}")


def rr(t):
    return t % NUM_CPUS

def lwr(row):
    eff = [row[f"cpu_{i}_load"] / row[f"cpu_{i}_speed"] for i in range(NUM_CPUS)]
    return int(np.argmin(eff))


def evaluate(policy):
    reward = 0
    sla = 0
    oracle = 0

    model.eval()

    for t, row in df.iterrows():
        if policy == "RR":
            cpu = rr(t)
        elif policy == "LWR":
            cpu = lwr(row)
        else:
            X, _ = build_graph(row)
            with torch.no_grad():
                cpu = torch.argmin(model(X)).item()

        completion = (
            row[f"cpu_{cpu}_wait_time"]
            + row["task_size"] / row[f"cpu_{cpu}_speed"]
        )

        violation = int(completion > row["sla_deadline"])

        reward += -completion - 8.0 * violation
        sla += violation
        oracle += int(cpu == int(row["chosen_cpu_oracle"]))

    n = len(df)
    return reward/n, oracle/n*100, sla/n*100


rr_res = evaluate("RR")
lwr_res = evaluate("LWR")
gnn_res = evaluate("GNN")

print("\n=== ROUND ROBIN ===")
print(f"Avg Reward      : {rr_res[0]:.2f}")
print(f"Oracle Accuracy : {rr_res[1]:.2f}%")
print(f"SLA Violation % : {rr_res[2]:.2f}%")

print("\n=== LWR ===")
print(f"Avg Reward      : {lwr_res[0]:.2f}")
print(f"Oracle Accuracy : {lwr_res[1]:.2f}%")
print(f"SLA Violation % : {lwr_res[2]:.2f}%")

print("\n=== GNN-BASED GLOBAL (REGRESSION) ===")
print(f"Avg Reward      : {gnn_res[0]:.2f}")
print(f"Oracle Accuracy : {gnn_res[1]:.2f}%")
print(f"SLA Violation % : {gnn_res[2]:.2f}%")


Epoch 1/10 | Loss: 6.6600
Epoch 2/10 | Loss: 1.2143
Epoch 3/10 | Loss: 0.8222
Epoch 4/10 | Loss: 0.5389
Epoch 5/10 | Loss: 0.3542
Epoch 6/10 | Loss: 0.2388
Epoch 7/10 | Loss: 0.1704
Epoch 8/10 | Loss: 0.1403
Epoch 9/10 | Loss: 0.1162
Epoch 10/10 | Loss: 0.0993

=== ROUND ROBIN ===
Avg Reward      : -10.96
Oracle Accuracy : 25.78%
SLA Violation % : 63.66%

=== LWR ===
Avg Reward      : -7.58
Oracle Accuracy : 71.16%
SLA Violation % : 42.62%

=== GNN-BASED GLOBAL (REGRESSION) ===
Avg Reward      : -6.98
Oracle Accuracy : 96.39%
SLA Violation % : 39.05%
