In [37]:
!pip install sagemaker -q

In [64]:
from sklearn.model_selection import train_test_split
import sagemaker
from sagemaker.s3 import S3Uploader
from sagemaker import image_uris
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput
import numpy as np
import boto3
import pandas as pd

In [65]:
from rich.console import Console
from rich.status import Status

console = Console()

with Status("[bold cyan]Starting download...[/bold cyan]") as status:
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00560/SeoulBikeData.csv"
    df = pd.read_csv(url, encoding='unicode_escape')

console.print("[bold green]All data downloaded successfully![/bold green]")
df.head()

Output()

Unnamed: 0,Date,Rented Bike Count,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes


In [66]:
df.columns = df.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
df = df.drop(columns=['Date', 'Functioning_Day', 'Holiday', 'Seasons'])
df['Hour'] = df['Hour'].astype(int)

y = df['Rented_Bike_Count']
X = df.drop(columns=['Rented_Bike_Count'])

full_df = pd.concat([y, X], axis=1)
full_df.to_csv("bike_data.csv", header=False, index=False)
full_df.head()

  df.columns = df.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
  df.columns = df.columns.str.replace(' ', '_').str.replace('(', '').str.replace(')', '')


Unnamed: 0,Rented_Bike_Count,Hour,Temperature°C,Humidity%,Wind_speed_m/s,Visibility_10m,Dew_point_temperature°C,Solar_Radiation_MJ/m2,Rainfallmm,Snowfall_cm
0,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0
1,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0
2,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0
3,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0
4,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0


In [67]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train["count"] = y_train
X_test["count"] = y_test

X_train.to_csv("train.csv", index=False, header=False)
X_test.to_csv("test.csv", index=False, header=False)

In [68]:
bucket = "ccbda-research-sagemaker"
prefix = "energy-bike-demand"
region = "eu-north-1"

In [69]:
import boto3


s3 = boto3.client("s3")


with Status(f"[cyan]Uploading {prefix}/train to s3...", spinner="dots"):
    s3.upload_file("train.csv", bucket, f"{prefix}/train/bike_data.csv")

with Status(f"[cyan]Uploading {prefix}/test to s3...", spinner="dots"):
    s3.upload_file("test.csv", bucket, f"{prefix}/test/bike_data.csv")

Output()

In [70]:
role = "arn:aws:iam::940819259195:role/AmazonSageMaker-TrainingExecutionRole"

In [71]:
session = sagemaker.Session()

In [72]:
xgboost_image_uri = image_uris.retrieve("xgboost", region=region, version="1.5-1")

estimator = Estimator(
    image_uri=xgboost_image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.large",
    volume_size=5,
    max_run=3600,
    output_path=f"s3://{bucket}/{prefix}/output",
    sagemaker_session=session,
)

# XGBoost Hyperparameters
estimator.set_hyperparameters(
    objective="reg:squarederror",
    num_round=100,
    max_depth=5,
    eta=0.2,
    subsample=0.8,
    colsample_bytree=0.8,
)

In [73]:
train_input = TrainingInput(
    f"s3://{bucket}/{prefix}/train/", content_type="text/csv"
)

In [74]:
estimator.fit({"train": train_input})

console.print("[bold green]Training job submitted successfully! Check the SageMaker console for live logs.[/bold green]")

2025-05-07 17:26:27 Starting - Starting the training job...
2025-05-07 17:26:59 Downloading - Downloading input data......
2025-05-07 17:27:40 Downloading - Downloading the training image......
2025-05-07 17:28:51 Training - Training image download completed. Training in progress.
  from pandas import MultiIndex, Int64Index[0m
[34m[2025-05-07 17:28:45.700 ip-10-0-231-91.eu-north-1.compute.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2025-05-07 17:28:45.730 ip-10-0-231-91.eu-north-1.compute.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2025-05-07:17:28:46:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2025-05-07:17:28:46:INFO] Failed to parse hyperparameter objective value reg:squarederror to Json.[0m
[34mReturning the value itself[0m
[34m[2025-05-07:17:28:46:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2025-05-07:17:28:46:INFO] Running XGBoost Sagemaker in algorithm mod

In [75]:
predictor = estimator.deploy(initial_instance_count=1, instance_type="ml.m5.large")

console.print(f"[bold green]Model deployed successfully![/bold green] Endpoint name: [bold]{predictor.endpoint_name}[/bold]")

--------!

In [84]:
import json
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from rich.progress import track

In [85]:
df = pd.read_csv("test.csv", header=None)
X = df.iloc[:, 1:]  # Features
y_true = df.iloc[:, 0]  # True labels

In [86]:
runtime = boto3.client("sagemaker-runtime", region_name=region)

In [90]:
y_pred = []
for i in track(range(len(X)), description="Predicting..."):
    payload = ",".join(map(str, X.iloc[i].values))

    try:
        response = runtime.invoke_endpoint(
            EndpointName=predictor.endpoint_name,
            ContentType="text/csv",
            Body=payload
        )
        result = response["Body"].read().decode("utf-8").strip()
        prediction = round(float(result))
        y_pred.append(prediction)

    except Exception as e:
        console.log(f"[red]✗ Error at row {i}:[/red] {e}")
        y_pred.append(None)  # Optional: handle failed predictions gracefully

Output()

In [91]:
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

console.print(f"[green]MSE: {mse:.2f}[/green]")
console.print(f"[green]MAE: {mae:.2f}[/green]")
console.print(f"[green] R²: {r2:.2f}[/green]")