In [None]:
import json
import pandas as pd
import time
import os

# -------- CONFIG --------
INPUT_FILE = "data/iot_data_100mb.json"   # your generated IoT file
OUTPUT_CSV = "results/rc_results.csv"
OUTPUT_JSON = "results/rc_summary.json"

BATCH_SIZE = 10000     # process events in chunks to save memory
PRINT_EVERY = 50000

# Bandwidths (in Mbps)
BANDWIDTH_EDGE = 500
BANDWIDTH_RC = 200
BANDWIDTH_CLOUD = 50

# Distances (in km)
DIST_EDGE = 5
DIST_RC = 100
DIST_CLOUD = 2000

# Processing rates (MIPS)
PROC_EDGE = 50000
PROC_RC = 100000
PROC_CLOUD = 300000

# Costs
COST_PROC_EDGE = 0.000001
COST_PROC_RC = 0.000002
COST_PROC_CLOUD = 0.000008
COST_TX_PER_MB = 0.00005

# Energy constants (Joules)
ENERGY_PER_MB = 0.02
ENERGY_PROC_PER_MIPS = 1e-7


# -------- MODELS --------
def transmission_delay(size_kb, bw_mbps, dist_km):
    size_mb = size_kb / 1024
    tx_time = (size_mb * 8) / bw_mbps
    prop_time = dist_km / 200_000
    return (tx_time + prop_time) * 1000  # ms

def processing_delay(size_kb, rate_mips):
    required_mips = size_kb * 50
    delay_ms = (required_mips / rate_mips) * 1000
    return delay_ms, required_mips

def transmission_cost(size_kb, dist_km):
    return (size_kb / 1024) * COST_TX_PER_MB * (1 + dist_km / 10000)

def processing_cost(required_mips, rate_mips, base_cost):
    return (required_mips / rate_mips) * base_cost * rate_mips

def total_energy(size_kb, required_mips):
    return (size_kb / 1024) * ENERGY_PER_MB + required_mips * ENERGY_PROC_PER_MIPS


# -------- CORE SIM --------
def process_event(event, i):
    # expect keys: size_kb, latency_need, peak_hour (or derive them)
    size_kb = float(event.get("size_kb", 5))
    latency_need = event.get("latency_need", "medium")
    peak_hour = event.get("peak_hour", False)

    # Decision logic
    if latency_need == "low":
        path = "edge"
    elif peak_hour:
        path = "rc"
    else:
        path = "cloud"

    # choose parameters
    if path == "edge":
        dist, bw, proc, cost_proc = DIST_EDGE, BANDWIDTH_EDGE, PROC_EDGE, COST_PROC_EDGE
    elif path == "rc":
        dist, bw, proc, cost_proc = DIST_RC, BANDWIDTH_RC, PROC_RC, COST_PROC_RC
    else:
        dist, bw, proc, cost_proc = DIST_CLOUD, BANDWIDTH_CLOUD, PROC_CLOUD, COST_PROC_CLOUD

    tx_delay = transmission_delay(size_kb, bw, dist)
    proc_delay, req_mips = processing_delay(size_kb, proc)
    delay_total = tx_delay + proc_delay
    cost_total = transmission_cost(size_kb, dist) + processing_cost(req_mips, proc, cost_proc)
    energy_total = total_energy(size_kb, req_mips)

    return {
        "event_id": i,
        "path": path,
        "size_kb": size_kb,
        "tx_delay_ms": tx_delay,
        "proc_delay_ms": proc_delay,
        "total_delay_ms": delay_total,
        "cost_usd": cost_total,
        "energy_j": energy_total,
        "latency_need": latency_need,
        "peak_hour": peak_hour,
    }


def simulate_from_file():
    os.makedirs("results", exist_ok=True)
    results = []
    count = 0
    start = time.time()

    with open(INPUT_FILE, "r") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                event = json.loads(line)
            except json.JSONDecodeError:
                continue

            results.append(process_event(event, count))
            count += 1

            if count % BATCH_SIZE == 0:
                df = pd.DataFrame(results)
                mode = "a" if os.path.exists(OUTPUT_CSV) else "w"
                header = not os.path.exists(OUTPUT_CSV)
                df.to_csv(OUTPUT_CSV, index=False, mode=mode, header=header)
                results.clear()

            if count % PRINT_EVERY == 0:
                print(f"Processed {count:,} events...")

    # write leftover rows
    if results:
        df = pd.DataFrame(results)
        mode = "a" if os.path.exists(OUTPUT_CSV) else "w"
        header = not os.path.exists(OUTPUT_CSV)
        df.to_csv(OUTPUT_CSV, index=False, mode=mode, header=header)

    elapsed = time.time() - start
    print(f"\n✅ Simulation complete in {elapsed:.2f}s, processed {count:,} events.")

    # summarize
    df = pd.read_csv(OUTPUT_CSV)
    summary = {
        "total_events": len(df),
        "avg_delay_ms": round(df["total_delay_ms"].mean(), 3),
        "avg_cost_usd": round(df["cost_usd"].mean(), 8),
        "avg_energy_j": round(df["energy_j"].mean(), 6),
        "distribution": df["path"].value_counts(normalize=True).to_dict(),
    }

    with open(OUTPUT_JSON, "w") as f:
        json.dump(summary, f, indent=2)

    print(f"Summary saved to {OUTPUT_JSON}")
    print(json.dumps(summary, indent=2))


if __name__ == "__main__":
    simulate_from_file()
