# Lab Sagemaker Deployment
## Cell 1: Import Libraries and Setup

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

import sagemaker
from sagemaker import Session
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import CSVDeserializer
from sagemaker.session import TrainingInput

from sagemaker.amazon.amazon_estimator import get_image_uri

import boto3
import os
import shap

print("✅ Libraries imported successfully!")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
✅ Libraries imported successfully!


## Cell 2: Initialize SageMaker Session

In [2]:
def initialize_sagemaker():
    """Initialize SageMaker session and get basic info"""
    session = Session()
    bucket = session.default_bucket()
    role = sagemaker.get_execution_role()
    region = session.boto_region_name
    
    print("🔧 SageMaker Initialization Complete:")
    print(f"   AWS Region: {region}")
    print(f"   S3 Bucket: {bucket}")
    print(f"   Role: {role.split('/')[-1]}")
    
    return session, bucket, role, region

# Run initialization
session, bucket, role, region = initialize_sagemaker()

🔧 SageMaker Initialization Complete:
   AWS Region: eu-central-1
   S3 Bucket: sagemaker-eu-central-1-238708039523
   Role: AmazonSageMakerServiceCatalogProductsUseRole


## 3: Load and Prepare Data

In [3]:
def load_and_prepare_iris_data():
    """Load and prepare iris dataset for SageMaker"""
    print("📊 Loading iris dataset...")
    
    # Load iris dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Convert to DataFrame for better handling
    feature_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
    X_df = pd.DataFrame(X, columns=feature_names)
    y_series = pd.Series(y, name='target')
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X_df, y_series, test_size=0.2, random_state=42, stratify=y
    )
    
    # Create DataFrames with target variable (SageMaker expects target in first column)
    train_data = pd.concat([y_train, X_train], axis=1)
    test_data = pd.concat([y_test, X_test], axis=1)
    
    print(f"✅ Data loaded successfully!")
    print(f"   Training set: {train_data.shape}")
    print(f"   Test set: {test_data.shape}")
    print(f"   Classes: {np.unique(y)}")
    
    return train_data, test_data, X_train, X_test, y_train, y_test

# Test the function
train_data, test_data, X_train, X_test, y_train, y_test = load_and_prepare_iris_data()
print("\nFirst 5 rows of training data:")
print(train_data.head())

📊 Loading iris dataset...
✅ Data loaded successfully!
   Training set: (120, 5)
   Test set: (30, 5)
   Classes: [0 1 2]

First 5 rows of training data:
     target  sepal_length  sepal_width  petal_length  petal_width
8         0           4.4          2.9           1.4          0.2
106       2           4.9          2.5           4.5          1.7
76        1           6.8          2.8           4.8          1.4
9         0           4.9          3.1           1.5          0.1
89        1           5.5          2.5           4.0          1.3


## 4: Save Data Locally Function

In [4]:
def save_data_locally(train_data, test_data):
    """Save train and test data to local files"""
    print("💾 Saving data locally...")
    
    # Save without headers for SageMaker
    train_data.to_csv('train.csv', index=False, header=False)
    test_data.to_csv('test.csv', index=False, header=False)
    
    # Also save with headers for inspection
    train_data.to_csv('train_with_headers.csv', index=False)
    test_data.to_csv('test_with_headers.csv', index=False)
    
    print("✅ Data saved locally!")
    print("   train.csv (for SageMaker)")
    print("   train_with_headers.csv (for inspection)")
    print("   test.csv (for SageMaker)") 
    print("   test_with_headers.csv (for inspection)")

# Run the function
save_data_locally(train_data, test_data)

# Verify the files
print("\n📁 Local files created:")
!ls -la *.csv

💾 Saving data locally...
✅ Data saved locally!
   train.csv (for SageMaker)
   train_with_headers.csv (for inspection)
   test.csv (for SageMaker)
   test_with_headers.csv (for inspection)

📁 Local files created:
-rw-rw-r-- 1 ec2-user ec2-user  540 Oct 27 16:15 test.csv
-rw-rw-r-- 1 ec2-user ec2-user  597 Oct 27 16:15 test_with_headers.csv
-rw-rw-r-- 1 ec2-user ec2-user 2160 Oct 27 16:15 train.csv
-rw-rw-r-- 1 ec2-user ec2-user 2217 Oct 27 16:15 train_with_headers.csv


## 5: Upload to S3 Function

In [5]:
def upload_to_s3(bucket, prefix):
    """Upload data to S3 bucket"""
    print("☁️ Uploading data to S3...")
    
    s3 = boto3.Session().resource('s3')
    
    # Upload training data
    s3.Bucket(bucket).Object(
        f"{prefix}/data/train.csv").upload_file('train.csv')
    
    # Upload test data  
    s3.Bucket(bucket).Object(
        f"{prefix}/data/test.csv").upload_file('test.csv')
    
    print(f"✅ Data uploaded to S3!")
    print(f"   s3://{bucket}/{prefix}/data/train.csv")
    print(f"   s3://{bucket}/{prefix}/data/test.csv")
    
    # Verify upload
    print("\n📋 Verifying S3 upload:")
    !aws s3 ls {bucket}/{prefix}/data/ --recursive

# Run the function
prefix = "iris-classification-demo"
upload_to_s3(bucket, prefix)

☁️ Uploading data to S3...
✅ Data uploaded to S3!
   s3://sagemaker-eu-central-1-238708039523/iris-classification-demo/data/train.csv
   s3://sagemaker-eu-central-1-238708039523/iris-classification-demo/data/test.csv

📋 Verifying S3 upload:
2025-10-27 16:16:46        540 iris-classification-demo/data/test.csv
2025-10-27 16:16:46       2160 iris-classification-demo/data/train.csv


## 6: Create and Train Model Function

In [6]:
def create_and_train_model(bucket, prefix, role):
    """Create and train Random Forest model on SageMaker"""
    print("🤖 Creating and training Random Forest model...")
    
    # Get SageMaker session and region
    sagemaker_session = Session()
    region = sagemaker_session.boto_region_name
    
    # Get Random Forest container (instead of XGBoost)
    container = get_image_uri(region, 'randomforest')
    print(f"   Using container: {container}")
    
    # Configure S3 output location
    s3_output_location = f's3://{bucket}/{prefix}/randomforest_model'
    
    # Create Random Forest estimator
    rf_model = sagemaker.estimator.Estimator(
        image_uri=container,
        role=role,
        instance_count=1,
        instance_type='ml.m5.large',
        volume_size=5,
        output_path=s3_output_location,
        sagemaker_session=sagemaker_session
    )
    
    # Set Random Forest hyperparameters
    rf_model.set_hyperparameters(
        num_classes=3,           # 3 classes in iris dataset
        num_trees=50,            # Number of trees in the forest
        max_depth=10,            # Maximum depth of each tree
        min_node_size=1,         # Minimum number of samples in leaf nodes
        feature_bagging_factor=0.7,  # Fraction of features to use for each tree
        seed=42                  # Random seed for reproducibility
    )
    
    # Define training inputs (same as before)
    from sagemaker.inputs import TrainingInput
    
    train_input = TrainingInput(
        f"s3://{bucket}/{prefix}/data/train.csv", 
        content_type="text/csv"
    )
    
    test_input = TrainingInput(
        f"s3://{bucket}/{prefix}/data/test.csv", 
        content_type="text/csv"
    )
    
    print("   Starting Random Forest training...")
    print("   This may take a few minutes...")
    
    # Train the model
    rf_model.fit({"train": train_input, "validation": test_input}, wait=True)
    
    print("✅ Random Forest training completed!")
    print(f"   Model artifacts: {rf_model.model_data}")
    
    return rf_model

In [7]:
# Run the function (add this at the end of Cell 5)
print("🎯 Starting Random Forest training...")
trained_model = create_and_train_model(bucket, prefix, role)
print("✅ Random Forest training completed!")

🎯 Starting Random Forest training...
🤖 Creating and training Random Forest model...


The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


## 6: Deploy Model Function

In [None]:
def deploy_model(model):
    """Deploy the trained model to an endpoint"""
    print("🚀 Deploying model to endpoint...")
    
    predictor = model.deploy(
        initial_instance_count=1,
        instance_type='ml.t2.medium',
        serializer=CSVSerializer(),
        deserializer=CSVDeserializer(),
        endpoint_name=f"iris-classifier-{pd.Timestamp.now().strftime('%Y-%m-%d-%H-%M-%S')}"
    )
    
    print("✅ Model deployed successfully!")
    print(f"   Endpoint name: {predictor.endpoint_name}")
    print(f"   Instance type: ml.t2.medium")
    
    return predictor

# Run the function
predictor = deploy_model(trained_model)