In [2]:
# Setup + Load splits
import os, sys, json
import pandas as pd
import sagemaker
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
from time import gmtime, strftime
from dotenv import load_dotenv

sys.path.append("..")
from config.config import BUCKET_NAME, S3_PREFIX

load_dotenv()
ROLE_ARN = os.getenv("SAGEMAKER_ROLE_ARN")
if not ROLE_ARN:
    raise RuntimeError("SAGEMAKER_ROLE_ARN not found in .env")

sess = sagemaker.Session()
region = sess.boto_region_name
print("Region:", region)
print("Bucket:", BUCKET_NAME)

train_s3 = f"s3://{BUCKET_NAME}/{S3_PREFIX['train']}/features.parquet"
val_s3   = f"s3://{BUCKET_NAME}/{S3_PREFIX['validation']}/features.parquet"
test_s3  = f"s3://{BUCKET_NAME}/{S3_PREFIX['test']}/features.parquet"

df_train = pd.read_parquet(train_s3)
df_val   = pd.read_parquet(val_s3)
df_test  = pd.read_parquet(test_s3)

for df in (df_train, df_val, df_test):
    df["date"] = pd.to_datetime(df["date"])

print("Shapes:", df_train.shape, df_val.shape, df_test.shape)

Region: us-east-1
Bucket: nfci-forecasting-222634372778
Shapes: (6800, 80) (1100, 80) (1100, 80)


In [3]:
# Convert to DeepAR JSONL (1 line per state series)
ITEM_COL = "state_fips"
TIME_COL = "date"
TARGET_COL = "NFCI"

def to_deepar_jsonl(df: pd.DataFrame, out_path: str):
    df = df[[ITEM_COL, TIME_COL, TARGET_COL]].copy()
    df = df.sort_values([ITEM_COL, TIME_COL]).reset_index(drop=True)

    n_series = 0
    with open(out_path, "w") as f:
        for item_id, g in df.groupby(ITEM_COL):
            g = g.sort_values(TIME_COL)

            start = g[TIME_COL].iloc[0].strftime("%Y-%m-%d %H:%M:%S")
            target = g[TARGET_COL].astype(float).tolist()

            # keep only states with enough history
            if len(target) < 24:
                continue

            f.write(json.dumps({"start": start, "target": target}) + "\n")
            n_series += 1

    print(f"Wrote {n_series} series -> {out_path}")
    return out_path

train_jsonl = to_deepar_jsonl(df_train, "train.jsonl")
val_jsonl   = to_deepar_jsonl(df_val, "validation.jsonl")
test_jsonl  = to_deepar_jsonl(df_test, "test.jsonl")

Wrote 50 series -> train.jsonl
Wrote 0 series -> validation.jsonl
Wrote 0 series -> test.jsonl


In [12]:
def count_lines(path):
    with open(path, "r") as f:
        return sum(1 for _ in f)

print("train.jsonl lines:", count_lines("train.jsonl"))
print("validation.jsonl lines:", count_lines("validation.jsonl"))
print("test.jsonl lines:", count_lines("test.jsonl"))

train.jsonl lines: 50
validation.jsonl lines: 0
test.jsonl lines: 0


In [4]:
# Upload JSONL to S3
DEEPar_PREFIX = "nfci-deepar-1step"

train_s3_uri = sess.upload_data("train.jsonl", key_prefix=f"{DEEPar_PREFIX}/train")
val_s3_uri   = sess.upload_data("validation.jsonl", key_prefix=f"{DEEPar_PREFIX}/validation")
test_s3_uri  = sess.upload_data("test.jsonl", key_prefix=f"{DEEPar_PREFIX}/test")

print("train:", train_s3_uri)
print("val:  ", val_s3_uri)
print("test: ", test_s3_uri)

train: s3://sagemaker-us-east-1-222634372778/nfci-deepar-1step/train/train.jsonl
val:   s3://sagemaker-us-east-1-222634372778/nfci-deepar-1step/validation/validation.jsonl
test:  s3://sagemaker-us-east-1-222634372778/nfci-deepar-1step/test/test.jsonl


In [None]:
# SageMaker Experiments: create/load an Experiment + create a Trial for this run
from sagemaker.experiments.experiment import Experiment
from sagemaker.experiments.run import Run

EXPERIMENT_NAME = "nfci-deepar-1step-exp"

# Create or load Experiment (idempotent)
try:
    exp = Experiment.load(
        experiment_name=EXPERIMENT_NAME,
        sagemaker_session=sess
    )
except Exception:
    exp = Experiment.create(
        experiment_name=EXPERIMENT_NAME,
        description="DeepAR 1-step NFCI experiments",
        sagemaker_session=sess,
    )

run_name = f"run-{strftime('%Y%m%d-%H%M%S', gmtime())}"

print("Experiment:", EXPERIMENT_NAME)
print("Run:", run_name)


Experiment: nfci-deepar-1step-exp
Run: run-20260205-100933


In [None]:
# training + Experiments logging (NO test channel)
from sagemaker.experiments.run import Run
from time import gmtime, strftime

deepar_image = sagemaker.image_uris.retrieve(
    framework="forecasting-deepar",
    region=region,
    version="1"
)

job_name = "deepar-1step-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
output_path = f"s3://{BUCKET_NAME}/{DEEPar_PREFIX}/output/{job_name}"

deepar = Estimator(
    image_uri=deepar_image,
    role=ROLE_ARN,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    output_path=output_path,
    sagemaker_session=sess,
)

# baseline (your earlier better-performing params)
deepar.set_hyperparameters(
    time_freq="M",
    prediction_length=1,
    context_length=32,
    epochs=15,
    mini_batch_size=64,
    learning_rate=1e-3,
    num_cells=40,
    num_layers=2,
    dropout_rate=0.1,
    likelihood="gaussian",
)

train_input = TrainingInput(train_s3_uri, content_type="json")

run_name = f"run-{strftime('%Y%m%d-%H%M%S', gmtime())}"

with Run(
    experiment_name=EXPERIMENT_NAME,
    run_name=run_name,
    sagemaker_session=sess,
) as run:

    # Train with ONLY train channel (no test/validation)
    deepar.fit(
        {"train": train_input},
        job_name=job_name,
        logs=True
    )

    # log params + pointers
    run.log_parameters(deepar.hyperparameters())
    run.log_parameter("training_job_name", job_name)
    run.log_parameter("train_s3_uri", train_s3_uri)

print("Training completed:", job_name)
print("Logged to Experiment:", EXPERIMENT_NAME, "| Run:", run_name)

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: deepar-1step-2026-02-05-10-29-47


2026-02-05 10:27:09 Starting - Starting the training job...
2026-02-05 10:27:40 Starting - Preparing the instances for training...
2026-02-05 10:28:01 Downloading - Downloading input data...
2026-02-05 10:28:22 Downloading - Downloading the training image.........
2026-02-05 10:30:03 Training - Training image download completed. Training in progress.Docker entrypoint called with argument(s): train
Running default environment configuration script
Running custom environment configuration script
  if num_device is 1 and 'dist' not in kvstore:
[02/05/2026 10:30:12 INFO 139752390506304] Reading default configuration from /opt/amazon/lib/python3.9/site-packages/algorithm/resources/default-input.json: {'_kvstore': 'auto', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_tuning_objective_metric': '', 'cardinality': 'auto', 'dropout_rate': '0.10', 'early_stopping_patience': '', 'embedding_dimension': '10', 'learning_rate': '0.001', 'likelihood': 'student-t', 'mini_batch_size': '128', 'num_cell

In [15]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

# Deploy
predictor = deepar.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge"
)
predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

ITEM_COL = "state_fips"
TIME_COL = "date"
TARGET_COL = "NFCI"

def deepar_predict_1step(predictor, start, history):
    hist = []
    for x in history:
        try:
            fx = float(x)
            if np.isfinite(fx):
                hist.append(fx)
        except Exception:
            continue

    if len(hist) < 2:
        raise ValueError("History too short after cleaning (need at least 2 points).")

    payload = {
        "instances": [{"start": start, "target": hist}],
        "configuration": {"num_samples": 200, "output_types": ["mean"]},
    }
    resp = predictor.predict(payload)
    return float(resp["predictions"][0]["mean"][0])

INFO:sagemaker:Creating model with name: forecasting-deepar-2026-02-05-10-41-08-188
INFO:sagemaker:Creating endpoint-config with name forecasting-deepar-2026-02-05-10-41-08-188
INFO:sagemaker:Creating endpoint with name forecasting-deepar-2026-02-05-10-41-08-188


--------!

In [16]:
# ---- Evaluation (uses df_train + df_test like your earlier working notebook) ----
df_train_eval = df_train.copy()
df_test_eval  = df_test.copy()
df_train_eval[TIME_COL] = pd.to_datetime(df_train_eval[TIME_COL])
df_test_eval[TIME_COL]  = pd.to_datetime(df_test_eval[TIME_COL])

y_true, y_pred = [], []
states = sorted(set(df_train_eval[ITEM_COL].unique()).intersection(set(df_test_eval[ITEM_COL].unique())))
MIN_HISTORY = 6

for sid in states:
    g_tr = df_train_eval[df_train_eval[ITEM_COL] == sid].sort_values(TIME_COL).reset_index(drop=True)
    g_te = df_test_eval[df_test_eval[ITEM_COL] == sid].sort_values(TIME_COL).reset_index(drop=True)

    if len(g_te) == 0:
        continue

    history_clean = []
    for x in g_tr[TARGET_COL].tolist():
        try:
            fx = float(x)
            if np.isfinite(fx):
                history_clean.append(fx)
        except Exception:
            continue

    if len(history_clean) < MIN_HISTORY:
        continue

    start = pd.to_datetime(g_tr[TIME_COL].iloc[0]).strftime("%Y-%m-%d %H:%M:%S")

    for actual in g_te[TARGET_COL].tolist():
        try:
            a = float(actual)
        except Exception:
            continue
        if not np.isfinite(a):
            continue

        pred = deepar_predict_1step(predictor, start, history_clean)
        y_true.append(a)
        y_pred.append(pred)
        history_clean.append(a)

y_true = np.array(y_true, dtype=float)
y_pred = np.array(y_pred, dtype=float)

if len(y_true) == 0:
    raise ValueError("No evaluation points collected. Check your df_train/df_test split and NFCI values.")

mse  = mean_squared_error(y_true, y_pred)
rmse = float(np.sqrt(mse))
mae  = float(mean_absolute_error(y_true, y_pred))
r2   = float(r2_score(y_true, y_pred))

print("Test RMSE:", rmse)
print("Test MAE: ", mae)
print("Test R2:  ", r2)
print("N eval points:", len(y_true))

Test RMSE: 0.10072702624678334
Test MAE:  0.07883151024336364
Test R2:   -3.8290357501258887
N eval points: 1100


In [18]:
# Log evaluation metrics to SageMaker Experiments (as a separate TrialComponent)
#from sagemaker.experiments.trial_component import TrialComponent
from sagemaker.experiments.run import Run

# Log metrics (rmse/mae/r2 must already exist from Cell A)
with Run(
    experiment_name=EXPERIMENT_NAME,
    run_name=run_name,          # must match the run_name you used for training
    sagemaker_session=sess,
) as run:
    run.log_metric("test_rmse", rmse)
    run.log_metric("test_mae", mae)
    run.log_metric("test_r2", r2)
    run.log_metric("n_eval_points", int(len(y_true)))



INFO:sagemaker.experiments.run:The run (run-20260205-102947) under experiment (nfci-deepar-1step-exp) already exists. Loading it.


In [19]:
# cleanup to avoid charges
predictor.delete_endpoint()
print("Logged metrics to:", EXPERIMENT_NAME, "| Run:", run_name)

INFO:sagemaker:Deleting endpoint configuration with name: forecasting-deepar-2026-02-05-10-41-08-188
INFO:sagemaker:Deleting endpoint with name: forecasting-deepar-2026-02-05-10-41-08-188


Logged metrics to: nfci-deepar-1step-exp | Run: run-20260205-102947
