In [None]:
import sagemaker
import boto3
import pandas as pd
import numpy as np
import os
from time import gmtime, strftime
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# --- IMPORTS FOR TENSORFLOW ---
from sagemaker.tensorflow import TensorFlow
from sagemaker.tensorflow.model import TensorFlowModel

In [None]:
sm_boto3 = boto3.client("sagemaker")
sess = sagemaker.Session()
region = sess.boto_session.region_name
bucket = "sagemaker-us-east-1-314473031062" # Your Bucket
role = "arn:aws:iam::314473031062:role/service-role/AmazonSageMaker-ExecutionRole-20260111T111413" # Your Role

print(f"Bucket: {bucket}")
print(f"Region: {region}")

In [None]:
df_test_X = pd.read_csv("data/post/nn/test.csv")
df_test_Y = pd.read_csv("data/post/nn/test_target.csv")

In [None]:
prefix = "retail-forecasting/lstm"

# Upload Train Data
train_input_path = sess.upload_data(
    path="data/post/nn",       # Local folder containing train.csv and train_target.csv
    bucket=bucket,
    key_prefix=f"{prefix}/train"
)

# Upload Test Data
test_input_path = sess.upload_data(
    path="data/post/nn",       # Local folder containing test.csv and test_target.csv
    bucket=bucket,
    key_prefix=f"{prefix}/test"
)

print(f"Train Data uploaded to: {train_input_path}")
print(f"Test Data uploaded to: {test_input_path}")

In [None]:
script_name = "lstm_train_eval_script.py"  

tf_estimator = TensorFlow(
    entry_point=script_name,
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    framework_version="2.11",      # Modern TensorFlow version
    py_version="py39",             # Python 3.9
    base_job_name="retail-forecast-lstm",
    script_mode=True,              # Required for custom scripts
    hyperparameters={
        "epochs": 20,
        "batch_size": 32,
        "learning_rate": 0.001
    }
)

In [None]:
tf_estimator.fit({
    "train": train_input_path, 
    "test": test_input_path
}, wait=True)

In [None]:
endpoint_name = "retail-lstm-ep-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(f"Deploying to Endpoint: {endpoint_name}")

predictor = tf_estimator.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name=endpoint_name
)

In [None]:
limit = 100
print(f"\n[INFO] Selecting first {limit} rows for testing...")

# Prepare Data
X_subset = df_test_X.iloc[:limit].values
y_subset = df_test_Y.iloc[:limit].values.ravel()

# --- CRITICAL STEP FOR LSTM: RESHAPE ---
# The model expects [Samples, TimeSteps, Features]
# We have [Samples, Features], so we add the TimeStep dimension
X_reshaped = X_subset.reshape((X_subset.shape[0], 1, X_subset.shape[1]))

print(f"Sending Payload Shape: {X_reshaped.shape}")

# Predict
# TensorFlow predictor handles numpy arrays automatically and returns a dict
response = predictor.predict(X_reshaped)

# Parse JSON Response {'predictions': [[val], [val]...]}
predictions = np.array(response['predictions'])
y_preds = predictions.flatten() # Flatten [[50], [60]] -> [50, 60]

# Metrics
rmse = np.sqrt(mean_squared_error(y_subset, y_preds))
mae = mean_absolute_error(y_subset, y_preds)
r2 = r2_score(y_subset, y_preds)

print("\n" + "="*40)
print(f"LSTM EVALUATION REPORT (First {limit} rows)")
print("="*40)
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE):    {mae:.4f}")
print(f"R2 Score:                     {r2:.4f}")
print("="*40)

# Optional: Show side-by-side comparison
print("\nSample Comparisons (True vs Pred):")
for i in range(5): 
    print(f"Row {i}: Actual={y_subset[i]:.2f}, Predicted={y_preds[i]:.2f}")

In [None]:
sm_boto3.delete_endpoint(EndpointName=endpoint_name)