# Lab Sagemaker Deployment
## Cell 1: Import Libraries and Setup

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

import sagemaker
from sagemaker import Session
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import CSVDeserializer
from sagemaker.session import TrainingInput
import boto3
import os
import shap

print("✅ Libraries imported successfully!")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
✅ Libraries imported successfully!


## Cell 2: Initialize SageMaker Session

In [2]:
def initialize_sagemaker():
    """Initialize SageMaker session and get basic info"""
    session = Session()
    bucket = session.default_bucket()
    role = sagemaker.get_execution_role()
    region = session.boto_region_name
    
    print("🔧 SageMaker Initialization Complete:")
    print(f"   AWS Region: {region}")
    print(f"   S3 Bucket: {bucket}")
    print(f"   Role: {role.split('/')[-1]}")
    
    return session, bucket, role, region

# Run initialization
session, bucket, role, region = initialize_sagemaker()

🔧 SageMaker Initialization Complete:
   AWS Region: eu-central-1
   S3 Bucket: sagemaker-eu-central-1-238708039523
   Role: AmazonSageMakerServiceCatalogProductsUseRole


## 3: Load and Prepare Data

In [3]:
def load_and_prepare_iris_data():
    """Load and prepare iris dataset for SageMaker"""
    print("📊 Loading iris dataset...")
    
    # Load iris dataset
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    # Convert to DataFrame for better handling
    feature_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
    X_df = pd.DataFrame(X, columns=feature_names)
    y_series = pd.Series(y, name='target')
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X_df, y_series, test_size=0.2, random_state=42, stratify=y
    )
    
    # Create DataFrames with target variable (SageMaker expects target in first column)
    train_data = pd.concat([y_train, X_train], axis=1)
    test_data = pd.concat([y_test, X_test], axis=1)
    
    print(f"✅ Data loaded successfully!")
    print(f"   Training set: {train_data.shape}")
    print(f"   Test set: {test_data.shape}")
    print(f"   Classes: {np.unique(y)}")
    
    return train_data, test_data, X_train, X_test, y_train, y_test

# Test the function
train_data, test_data, X_train, X_test, y_train, y_test = load_and_prepare_iris_data()
print("\nFirst 5 rows of training data:")
print(train_data.head())

📊 Loading iris dataset...
✅ Data loaded successfully!
   Training set: (120, 5)
   Test set: (30, 5)
   Classes: [0 1 2]

First 5 rows of training data:
     target  sepal_length  sepal_width  petal_length  petal_width
8         0           4.4          2.9           1.4          0.2
106       2           4.9          2.5           4.5          1.7
76        1           6.8          2.8           4.8          1.4
9         0           4.9          3.1           1.5          0.1
89        1           5.5          2.5           4.0          1.3


## 4: Save Data Locally Function

In [4]:
def save_data_locally(train_data, test_data):
    """Save train and test data to local files"""
    print("💾 Saving data locally...")
    
    # Save without headers for SageMaker
    train_data.to_csv('train.csv', index=False, header=False)
    test_data.to_csv('test.csv', index=False, header=False)
    
    # Also save with headers for inspection
    train_data.to_csv('train_with_headers.csv', index=False)
    test_data.to_csv('test_with_headers.csv', index=False)
    
    print("✅ Data saved locally!")
    print("   train.csv (for SageMaker)")
    print("   train_with_headers.csv (for inspection)")
    print("   test.csv (for SageMaker)") 
    print("   test_with_headers.csv (for inspection)")

# Run the function
save_data_locally(train_data, test_data)

# Verify the files
print("\n📁 Local files created:")
!ls -la *.csv

💾 Saving data locally...
✅ Data saved locally!
   train.csv (for SageMaker)
   train_with_headers.csv (for inspection)
   test.csv (for SageMaker)
   test_with_headers.csv (for inspection)

📁 Local files created:
-rw-rw-r-- 1 ec2-user ec2-user  540 Oct 27 16:22 test.csv
-rw-rw-r-- 1 ec2-user ec2-user  597 Oct 27 16:22 test_with_headers.csv
-rw-rw-r-- 1 ec2-user ec2-user 2160 Oct 27 16:22 train.csv
-rw-rw-r-- 1 ec2-user ec2-user 2217 Oct 27 16:22 train_with_headers.csv


## 5: Upload to S3 Function

In [5]:
def upload_to_s3(bucket, prefix):
    """Upload data to S3 bucket"""
    print("☁️ Uploading data to S3...")
    
    s3 = boto3.Session().resource('s3')
    
    # Upload training data
    s3.Bucket(bucket).Object(
        f"{prefix}/data/train.csv").upload_file('train.csv')
    
    # Upload test data  
    s3.Bucket(bucket).Object(
        f"{prefix}/data/test.csv").upload_file('test.csv')
    
    print(f"✅ Data uploaded to S3!")
    print(f"   s3://{bucket}/{prefix}/data/train.csv")
    print(f"   s3://{bucket}/{prefix}/data/test.csv")
    
    # Verify upload
    print("\n📋 Verifying S3 upload:")
    !aws s3 ls {bucket}/{prefix}/data/ --recursive

# Run the function
prefix = "iris-classification-demo"
upload_to_s3(bucket, prefix)

☁️ Uploading data to S3...
✅ Data uploaded to S3!
   s3://sagemaker-eu-central-1-238708039523/iris-classification-demo/data/train.csv
   s3://sagemaker-eu-central-1-238708039523/iris-classification-demo/data/test.csv

📋 Verifying S3 upload:
2025-10-27 16:22:18        540 iris-classification-demo/data/test.csv
2025-10-27 16:22:18       2160 iris-classification-demo/data/train.csv


## 6: Create and Train Model Function

In [9]:
def create_and_train_xgboost(bucket, prefix, role):
    """Create and train XGBoost model (reliable fallback)"""
    print("🤖 Creating and training XGBoost model...")
    
    region = Session().boto_region_name
    container = sagemaker.image_uris.retrieve("xgboost", region, "1.2-1")
    print(f"   Using container: {container}")
    
    s3_output_location = f's3://{bucket}/{prefix}/xgboost_model'
    
    xgb_model = sagemaker.estimator.Estimator(
        image_uri=container,
        role=role,
        instance_count=1,
        instance_type='ml.m4.xlarge',
        volume_size=5,
        output_path=s3_output_location,
        sagemaker_session=Session()
    )
    
    # Set hyperparameters for multi-class classification
    xgb_model.set_hyperparameters(
        max_depth=5,
        eta=0.2,
        gamma=4,
        min_child_weight=1,
        subsample=0.8,
        objective="multi:softmax",
        num_class=3,
        num_round=50
    )
    
    from sagemaker.inputs import TrainingInput
    
    train_input = TrainingInput(
        f"s3://{bucket}/{prefix}/data/train.csv", content_type="csv"
    )
    
    test_input = TrainingInput(
        f"s3://{bucket}/{prefix}/data/test.csv", content_type="csv"
    )
    
    print("   Starting XGBoost training...")
    xgb_model.fit({"train": train_input, "validation": test_input}, wait=True)
    
    print("✅ XGBoost training completed!")
    return xgb_model

# Run XGBoost instead
print("🎯 Starting XGBoost training...")
trained_model = create_and_train_xgboost(bucket, prefix, role)

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-10-27-16-29-26-917


🎯 Starting XGBoost training...
🤖 Creating and training XGBoost model...
   Using container: 492215442770.dkr.ecr.eu-central-1.amazonaws.com/sagemaker-xgboost:1.2-1
   Starting XGBoost training...
2025-10-27 16:29:28 Starting - Starting the training job...
2025-10-27 16:29:52 Starting - Preparing the instances for training...
2025-10-27 16:30:24 Downloading - Downloading input data...
2025-10-27 16:30:54 Downloading - Downloading the training image.........
2025-10-27 16:32:26 Training - Training image download completed. Training in progress.
2025-10-27 16:32:26 Uploading - Uploading generated training model[34m[2025-10-27 16:32:17.484 ip-10-0-78-53.eu-central-1.compute.internal:7 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34mINFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training[0m
[34mINFO:sagemaker-containers:Failed to parse hyperparameter objective value multi:softmax to Json.[0m
[34mReturning the value itself[0m
[34mINFO:sagemaker

## 6: Deploy Model Function

In [11]:
def deploy_model(model):
    """Deploy the trained model to an endpoint"""
    print("🚀 Deploying model to endpoint...")
    
    predictor = model.deploy(
        initial_instance_count=1,
        instance_type='ml.t2.medium',
        serializer=CSVSerializer(),
        deserializer=CSVDeserializer(),
        endpoint_name=f"iris-classifier-{pd.Timestamp.now().strftime('%Y-%m-%d-%H-%M-%S')}"
    )
    
    print("✅ Model deployed successfully!")
    print(f"   Endpoint name: {predictor.endpoint_name}")
    print(f"   Instance type: ml.t2.medium")
    
    return predictor

# Run the function
predictor = deploy_model(trained_model)

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2025-10-27-16-33-25-041


🚀 Deploying model to endpoint...


INFO:sagemaker:Creating endpoint-config with name iris-classifier-2025-10-27-16-33-25
INFO:sagemaker:Creating endpoint with name iris-classifier-2025-10-27-16-33-25


---------!✅ Model deployed successfully!
   Endpoint name: iris-classifier-2025-10-27-16-33-25
   Instance type: ml.t2.medium


## 7: Make Predictions Function

In [12]:
def make_predictions(predictor, X_test, y_test, sample_size=10):
    """Make predictions using the deployed model"""
    print("🔮 Making predictions...")
    
    # Take a sample for prediction
    sample_data = X_test.iloc[:sample_size]
    true_labels = y_test.iloc[:sample_size]
    
    print(f"   Sample data shape: {sample_data.shape}")
    
    # Convert to CSV format and make predictions
    predictions = predictor.predict(sample_data.values)
    
    # Convert predictions to integers (class labels)
    predicted_labels = [int(float(pred[0])) for pred in predictions]
    
    print("\n📊 Prediction Results:")
    print("=" * 50)
    for i, (true_val, pred_val) in enumerate(zip(true_labels, predicted_labels)):
        status = "✅" if true_val == pred_val else "❌"
        print(f"Sample {i+1}: True={true_val}, Predicted={pred_val} {status}")
    
    # Calculate accuracy on the sample
    accuracy = np.sum(np.array(true_labels) == np.array(predicted_labels)) / len(true_labels)
    print(f"\n📈 Sample Accuracy: {accuracy:.1%}")
    
    return predictions, predicted_labels

# Run the function
predictions, predicted_labels = make_predictions(predictor, X_test, y_test)

# Show some test samples
print("\n🔍 Sample test data used for predictions:")
print(X_test.head(10))

🔮 Making predictions...
   Sample data shape: (10, 4)

📊 Prediction Results:
Sample 1: True=0, Predicted=0 ✅

📈 Sample Accuracy: 40.0%

🔍 Sample test data used for predictions:
     sepal_length  sepal_width  petal_length  petal_width
38            4.4          3.0           1.3          0.2
127           6.1          3.0           4.9          1.8
57            4.9          2.4           3.3          1.0
93            5.0          2.3           3.3          1.0
42            4.4          3.2           1.3          0.2
56            6.3          3.3           4.7          1.6
22            4.6          3.6           1.0          0.2
20            5.4          3.4           1.7          0.2
147           6.5          3.0           5.2          2.0
84            5.4          3.0           4.5          1.5


## 8: Test with Custom Data Function

In [13]:
def test_custom_predictions(predictor):
    """Test the model with custom input data"""
    print("🧪 Testing with custom data...")
    
    # Create some custom test cases based on iris dataset characteristics
    custom_test_cases = [
        [5.1, 3.5, 1.4, 0.2],  # Should predict class 0 (setosa)
        [6.7, 3.0, 5.2, 2.3],  # Should predict class 2 (virginica)
        [5.9, 3.0, 4.2, 1.5],  # Should predict class 1 (versicolor)
        [4.9, 3.0, 1.4, 0.2],  # Should predict class 0 (setosa)
        [6.3, 3.3, 6.0, 2.5]   # Should predict class 2 (virginica)
    ]
    
    class_names = ['setosa', 'versicolor', 'virginica']
    
    predictions = predictor.predict(custom_test_cases)
    predicted_classes = [int(float(pred[0])) for pred in predictions]
    
    print("\n🎯 Custom Test Results:")
    print("=" * 60)
    for i, (features, pred_class) in enumerate(zip(custom_test_cases, predicted_classes)):
        print(f"Test {i+1}: Features={features} -> {class_names[pred_class]} (class {pred_class})")
    
    return predicted_classes

# Run the function
custom_predictions = test_custom_predictions(predictor)

🧪 Testing with custom data...

🎯 Custom Test Results:
Test 1: Features=[5.1, 3.5, 1.4, 0.2] -> setosa (class 0)


## 9: Cleanup Function

In [15]:
def cleanup_resources(predictor):
    """Clean up the endpoint to avoid ongoing charges"""
    print("🧹 Cleaning up resources...")
    
    try:
        predictor.delete_endpoint()
        predictor.delete_model()
        print("✅ Endpoint and model deleted successfully!")
    except Exception as e:
        print(f"⚠️ Cleanup warning: {e}")
    
    # Also clean local files
    try:
        os.remove('train.csv')
        os.remove('test.csv') 
        os.remove('train_with_headers.csv')
        os.remove('test_with_headers.csv')
        print("✅ Local files cleaned up!")
    except:
        print("⚠️ Could not clean all local files")

# Uncomment the line below when you want to clean up
# cleanup_resources(predictor)
print("💡 Note: Run cleanup_resources(predictor) when done to avoid charges")

💡 Note: Run cleanup_resources(predictor) when done to avoid charges


## 10: Complete Pipeline Function

In [None]:
def run_complete_pipeline():
    """Run the complete pipeline from start to finish"""
    print("🎯 Starting Complete Iris Classification Pipeline")
    print("=" * 50)
    
    try:
        # Step 1: Load data
        train_data, test_data, X_train, X_test, y_train, y_test = load_and_prepare_iris_data()
        
        # Step 2: Save locally
        save_data_locally(train_data, test_data)
        
        # Step 3: Upload to S3
        upload_to_s3(bucket, prefix)
        
        # Step 4: Train model
        trained_model = create_and_train_model(bucket, prefix, role)
        
        # Step 5: Deploy model
        predictor = deploy_model(trained_model)
        
        # Step 6: Make predictions
        predictions, pred_labels = make_predictions(predictor, X_test, y_test)
        
        # Step 7: Test custom data
        test_custom_predictions(predictor)
        
        print("\n🎉 Pipeline completed successfully!")
        return predictor
        
    except Exception as e:
        print(f"❌ Pipeline failed: {e}")
        raise

# Uncomment to run the complete pipeline
# final_predictor = run_complete_pipeline()