In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

TRAIN_DF = "CityGuardian_training_data__preview__regenerated.csv"  # your 100-row file
assert Path(TRAIN_DF).exists()

df = pd.read_csv(TRAIN_DF, parse_dates=["timestamp"])

# --- make a tiny t+H target and a couple of simple rolling features (no leakage) ---
HORIZON_STEPS = 2  # +2 minutes in your 1-min dataset

df = df.sort_values(["cell_id","timestamp"]).copy()
df["utilization_t_plus"] = df.groupby("cell_id")["utilization"].shift(-HORIZON_STEPS)

grp = df.groupby("cell_id")
df["roll3_util_mean"]  = grp["utilization"].rolling(3, min_periods=1).mean().reset_index(level=0, drop=True)
df["roll3_util_slope"] = df["roll3_util_mean"] - grp["utilization"].rolling(2, min_periods=1).mean().reset_index(level=0, drop=True)

use = df[df["utilization_t_plus"].notna()].copy()

feature_cols = [
    "utilization","neighbor_util_mean",
    "p95_latency_ms","call_success_rate","handover_fail_rate",
    "is_event","event_profile","row","col",
    "roll3_util_mean","roll3_util_slope",
]
X = use[feature_cols].fillna(0.0)
y = use["utilization_t_plus"].values

# --- contiguous 'middle' test split so we catch the event block ---
uniq_ts = np.array(sorted(use["timestamp"].unique()))
n = len(uniq_ts)
tlen = max(2, int(np.ceil(n*0.25)))
start = (n - tlen)//2; end = start + tlen
ts_test = set(uniq_ts[start:end]); ts_train = set(uniq_ts[:start]); ts_val = set(uniq_ts[end:])

i_train = use["timestamp"].isin(ts_train).values
i_val   = use["timestamp"].isin(ts_val).values
i_test  = use["timestamp"].isin(ts_test).values

X_train, y_train = X[i_train], y[i_train]
X_val,   y_val   = X[i_val],   y[i_val]
X_test          = X[i_test]
y_test          = y[i_test]

# --- write SageMaker-ready CSVs (label first, no header) ---
import os
os.makedirs("sm_data/train", exist_ok=True)
os.makedirs("sm_data/validation", exist_ok=True)
os.makedirs("sm_data/test", exist_ok=True)

pd.concat([pd.Series(y_train), X_train], axis=1).to_csv("sm_data/train/train.csv", index=False, header=False)
pd.concat([pd.Series(y_val),   X_val],   axis=1).to_csv("sm_data/validation/val.csv", index=False, header=False)
# features-only for prediction
X_test.to_csv("sm_data/test/test_features.csv", index=False, header=False)

len(X_train), len(X_val), len(X_test), feature_cols


In [None]:
from sagemaker.s3 import S3Uploader  # tiny helper that doesn’t pull the full SDK
# If import fails, replace with the boto3 upload below.

S3_PREFIX = "cityguardian/xgb-boto3"
s3_train_uri = S3Uploader.upload("sm_data/train",      f"s3://{S3_BUCKET}/{S3_PREFIX}/train")
s3_val_uri   = S3Uploader.upload("sm_data/validation", f"s3://{S3_BUCKET}/{S3_PREFIX}/validation")
s3_test_uri  = S3Uploader.upload("sm_data/test",       f"s3://{S3_BUCKET}/{S3_PREFIX}/test")
print("S3:", s3_train_uri, s3_val_uri, s3_test_uri)


In [None]:
import boto3, os
s3 = boto3.client("s3")
def upload_dir(local_dir, bucket, prefix):
    for root,_,files in os.walk(local_dir):
        for f in files:
            p = os.path.join(root,f)
            key = f"{prefix}/{os.path.relpath(p, local_dir)}"
            s3.upload_file(p, bucket, key)
    return f"s3://{bucket}/{prefix}"

S3_PREFIX = "cityguardian/xgb-boto3"
s3_train_uri = upload_dir("sm_data/train",      S3_BUCKET, f"{S3_PREFIX}/train")
s3_val_uri   = upload_dir("sm_data/validation", S3_BUCKET, f"{S3_PREFIX}/validation")
s3_test_uri  = upload_dir("sm_data/test",       S3_BUCKET, f"{S3_PREFIX}/test")
print("S3:", s3_train_uri, s3_val_uri, s3_test_uri)


In [None]:
sm = boto3.client("sagemaker", region_name=AWS_REGION)

job_name = "cityguardian-xgb-" + datetime.datetime.utcnow().strftime("%Y%m%d-%H%M%S")

algo_spec = {
    "TrainingInputMode": "File",
    "AlgorithmName": "xgboost"   # built-in XGBoost container
}

hyperparams = {
    "objective": "reg:squarederror",
    "eval_metric": "rmse",
    "num_round": "400",
    "max_depth": "4",
    "eta": "0.07",
    "subsample": "0.9",
    "colsample_bytree": "0.9",
    "early_stopping_rounds": "20"
}

input_data = [
    {
        "ChannelName":"train",
        "DataSource":{"S3DataSource":{
            "S3DataType":"S3Prefix",
            "S3Uri": s3_train_uri,
            "S3DataDistributionType":"FullyReplicated"
        }},
        "ContentType":"text/csv"
    },
    {
        "ChannelName":"validation",
        "DataSource":{"S3DataSource":{
            "S3DataType":"S3Prefix",
            "S3Uri": s3_val_uri,
            "S3DataDistributionType":"FullyReplicated"
        }},
        "ContentType":"text/csv"
    }
]

output_conf = {"S3OutputPath": f"s3://{S3_BUCKET}/{S3_PREFIX}/output"}

resource_conf = {"InstanceType":"ml.m5.large", "InstanceCount":1, "VolumeSizeInGB":10}
stop_conf = {"MaxRuntimeInSeconds": 3600}

sm.create_training_job(
    TrainingJobName=job_name,
    AlgorithmSpecification=algo_spec,
    RoleArn=ROLE_ARN,
    InputDataConfig=input_data,
    OutputDataConfig=output_conf,
    ResourceConfig=resource_conf,
    HyperParameters=hyperparams,
    StoppingCondition=stop_conf,
)

def wait_training(name):
    while True:
        desc = sm.describe_training_job(TrainingJobName=name)
        status = desc["TrainingJobStatus"]
        sec    = desc.get("SecondaryStatus","")
        print("Status:", status, sec)
        if status in ("Completed","Failed","Stopped"):
            break
        time.sleep(30)
    return desc

desc = wait_training(job_name)
model_artifact = desc["ModelArtifacts"]["S3ModelArtifacts"]
print("Model artifact:", model_artifact)


In [None]:
model_name = job_name + "-model"
sm.create_model(
    ModelName=model_name,
    PrimaryContainer={"Image": XGB_IMAGE, "ModelDataUrl": model_artifact},
    ExecutionRoleArn=ROLE_ARN
)

transform_job = job_name + "-xform"
sm.create_transform_job(
    TransformJobName=transform_job,
    ModelName=model_name,
    TransformInput={
        "DataSource": {"S3DataSource": {
            "S3DataType": "S3Prefix",
            "S3Uri": s3_test_uri + "/test_features.csv"
        }},
        "ContentType":"text/csv",
        "SplitType":"Line"
    },
    TransformOutput={"S3OutputPath": f"s3://{S3_BUCKET}/{S3_PREFIX}/batch-preds"},
    TransformResources={"InstanceType":"ml.m5.large", "InstanceCount":1}
)

def wait_transform(name):
    while True:
        d = sm.describe_transform_job(TransformJobName=name)
        status = d["TransformJobStatus"]
        print("Transform:", status)
        if status in ("Completed","Failed","Stopped"):
            break
        time.sleep(20)
    return d

tdesc = wait_transform(transform_job)
preds_s3 = tdesc["TransformOutput"]["S3OutputPath"]
print("Batch preds at:", preds_s3)


In [None]:
# the output file will be .../<job-name>/test_features.csv.out
out_key_prefix = f"{S3_PREFIX}/batch-preds/{transform_job}"
s3 = boto3.client("s3")

# list objects under that prefix
resp = s3.list_objects_v2(Bucket=S3_BUCKET, Prefix=out_key_prefix)
pred_key = [x["Key"] for x in resp.get("Contents", []) if x["Key"].endswith(".out")][0]
print("S3 key:", pred_key)

# download and load
s3.download_file(S3_BUCKET, pred_key, "preds.out")
y_pred = np.loadtxt("preds.out", dtype=float)

# metrics (same as before)
from sklearn.metrics import mean_absolute_error, precision_recall_fscore_support, average_precision_score
mae = mean_absolute_error(y_test, y_pred)
rmse = float(np.sqrt(np.mean((y_test - y_pred)**2)))
print(f"SageMaker XGB: MAE={mae:.4f} RMSE={rmse:.4f}")

# alarm metrics (dynamic threshold from train set like before)
thr = float(np.quantile(y_train, 0.80))
yt = (y_test >= thr).astype(int); yp = (y_pred >= thr).astype(int)
p,r,f1,_ = precision_recall_fscore_support(yt, yp, average="binary", zero_division=0)
ap = average_precision_score(yt, y_pred)
print(f"→ Alarm@{thr:.2f}: Precision={p:.3f} Recall={r:.3f} F1={f1:.3f} AP={ap:.3f}")
