In [None]:
import sagemaker
from sklearn.model_selection import train_test_split
import boto3
import pandas as pd

In [None]:
sm_boto3=boto3.client("sagemaker")
sess=sagemaker.Session()
region=sess.boto_session.region_name
bucket="sagemaker-us-east-1-314473031062"
print(bucket)
print(region)
print(sess)

In [None]:
df_test_X=pd.read_csv("test.csv")
df_test_Y=pd.read_csv("test_target.csv")

df_train_X=pd.read_csv("train.csv")
df_train_Y=pd.read_csv("train_target.csv")

In [None]:
# 2. Upload Data to S3
prefix = "retail-forecasting/xgboost"

# Upload Train Data
train_input_path = sess.upload_data(
    path="data/post/xgboost",      
    bucket=bucket,
    key_prefix=f"{prefix}/train"
)

# Upload Test Data
test_input_path = sess.upload_data(
    path="data/post/xgboost",      
    bucket=bucket,
    key_prefix=f"{prefix}/test"
)

print(f"Train Data uploaded to: {train_input_path}")
print(f"Test Data uploaded to: {test_input_path}")

In [None]:
from sagemaker.xgboost.estimator import XGBoost

xgboost_estimator = XGBoost(
    entry_point="xgboost_train_eval_script.py",       
    role="arn:aws:iam::314473031062:role/service-role/AmazonSageMaker-ExecutionRole-20260111T111413",
    instance_count=1,
    instance_type="ml.m5.xlarge",
    framework_version="1.5-1",             
    py_version="py3",
    base_job_name="retail-forecast-xgb",
    hyperparameters={
        "n_estimators": 100,
        "random_state": 42,
        "max_depth": 5,
        "learning_rate": 0.1,
        "subsample": 0.8
    },
    use_spot_instances=True,
    max_run=3600,
    max_wait=7200                          
)

In [None]:
xgboost_estimator.fit({
    "train": train_input_path, 
    "test": test_input_path
}, wait=True)

In [None]:
xgboost_estimator.latest_training_job.wait(logs="None")
artifact = sm_boto3.describe_training_job(
    TrainingJobName=xgboost_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]

In [None]:
artifact

In [None]:
from sagemaker.xgboost.model import XGBoostModel
from time import gmtime,strftime

model_name="Custom-xgboost-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model=XGBoostModel(
    name=model_name,
    model_data=artifact,
    role="arn:aws:iam::314473031062:role/service-role/AmazonSageMaker-ExecutionRole-20260111T111413",
    entry_point="xgboost_train_eval_script.py",
    framework_version="1.5-1"
)

In [None]:
endpoint_name="Custom-xgboost-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("EndpointName={}".format(endpoint_name))

predictor=model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name=endpoint_name

)

In [None]:
predictor

In [None]:
from sagemaker.serializers import CSVSerializer

predictor.serializer = CSVSerializer()
prediction = predictor.predict(df_test_X.head(5).values)

print(prediction)

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sagemaker.serializers import CSVSerializer

# 1. CONFIGURE PREDICTOR
predictor.serializer = CSVSerializer()

# 2. SELECT SUBSET (100 Rows Only)
limit = 100
print(f"[INFO] Selecting first {limit} rows for rapid testing...")

X_subset = df_test_X.iloc[:limit]
y_subset = df_test_Y.iloc[:limit]

# 3. PREDICT
print("[INFO] Sending prediction request...")
response = predictor.predict(X_subset.values)

# 4. PARSE RESPONSE

if isinstance(response, bytes):
    response = response.decode("utf-8")

if isinstance(response, list):
    y_preds = np.array(response)
    
elif isinstance(response, str):
    y_preds = np.array([float(x) for x in response.replace('\n', ',').split(',') if x.strip()])
    
else:
    y_preds = np.array(response)

y_preds = y_preds.ravel()

# 5. CALCULATE METRICS
y_true = y_subset.values.ravel()

rmse = np.sqrt(mean_squared_error(y_true, y_preds))
mae = mean_absolute_error(y_true, y_preds)
r2 = r2_score(y_true, y_preds)

print("\n" + "="*40)
print(f"EVALUATION REPORT (First {limit} rows)")
print("="*40)
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE):    {mae:.4f}")
print(f"R2 Score:                     {r2:.4f}")
print("="*40)

In [None]:
mean_sales = y_true.mean()
error_percentage = (mae / mean_sales) * 100

print(f"Average Daily Sales: {mean_sales:.2f}")
print(f"Average Error %:     {error_percentage:.2f}%")

In [None]:
sm_boto3.delete_endpoint(EndpointName=endpoint_name)