# Animal Adoption Model Deployment
This notebook deploys the trained model to SageMaker for real-time predictions and demonstrates how to make predictions on new data.

## Cell 1: Setup and Import Libraries

In [None]:
import pandas as pd
import numpy as np
import joblib

print("✅ Libraries imported successfully")

✅ Libraries imported successfully


## Cell 2: Configuration and Model Loading

In [None]:
# Configuration and Model Loading
import json
import os
from pathlib import Path

import boto3
try:
    import sagemaker
except ImportError:
    sagemaker = None

def load_config():
    """Load configuration with multiple fallback options"""
    # Option 1: Load from config.json (recommended)
    config_file = Path('./config.json')
    if config_file.exists():
        with open(config_file) as f:
            config = json.load(f)
        print(f"Configuration loaded from {config_file}")
        return config
    
    # Option 2: Try environment variables
    if os.environ.get('S3_BUCKET_NAME'):
        config = {
            's3_bucket_name': os.environ['S3_BUCKET_NAME'],
            'aws_region': os.environ.get('AWS_REGION', 'us-east-1'),
            'sagemaker_role_arn': os.environ.get('SAGEMAKER_ROLE_ARN', '')
        }
        print("Configuration loaded from environment variables")
        return config
    
    # Option 3: Interactive input (beginner-friendly fallback)
    print("Configuration not found. Let's set it up interactively.")
    print("(You can skip this by running: python config_generator.py)")
    
    config = {}
    bucket_name = input("Enter your S3 bucket name (from terraform output): ").strip()
    config['s3_bucket_name'] = bucket_name
    config['aws_region'] = 'us-east-1'
    
    # For deployment notebook, SageMaker role is more important
    sagemaker_role = input("Enter SageMaker role ARN (required for deployment): ").strip()
    if sagemaker_role:
        config['sagemaker_role_arn'] = sagemaker_role
    
    # Save for next time
    with open('./config.json', 'w') as f:
        json.dump(config, f, indent=2)
    print("Configuration saved to config.json for future use")
    return config

# Load configuration
try:
    config = load_config()
    BUCKET_NAME = config['s3_bucket_name']
    AWS_REGION = config.get('aws_region', 'us-east-1')
    SAGEMAKER_ROLE = config.get('sagemaker_role_arn', None)
    
    print(f"Using S3 bucket: {BUCKET_NAME}")
    print(f"Using AWS region: {AWS_REGION}")
    
    if SAGEMAKER_ROLE:
        print(f"SageMaker role: {SAGEMAKER_ROLE[:50]}...")
        if sagemaker is None:
            print("SageMaker package not available")
            sagemaker_session = None
        else:
            boto_sess = boto3.Session(region_name=AWS_REGION)
            sagemaker_session = sagemaker.Session(boto_session=boto_sess)
            print(f"SageMaker session initialized in {AWS_REGION} - deployment enabled")
    else:
        print("No SageMaker role configured - local testing only")
        sagemaker_session = None

except KeyboardInterrupt:
    print("Configuration cancelled. Please run this cell again when ready.")
    BUCKET_NAME = None
    AWS_REGION = 'us-east-1'
    SAGEMAKER_ROLE = None
    sagemaker_session = None

Configuration loaded from config.json
Using S3 bucket: animal-insights-ae1a1bd9
Using AWS region: us-east-1
SageMaker role: arn:aws:iam::239285815587:role/sagemaker-execution...
SageMaker session initialized in us-east-1 - deployment enabled


## Cell 3: Load Trained Model and Artifacts

In [12]:
def load_model_artifacts():
    """Load the trained model, encoders, and metadata"""
    
    # Check if model artifacts exist
    model_path = './models/animal_adoption_model.pkl'
    encoders_path = './models/label_encoders.pkl'
    info_path = './models/model_info.json'
    
    if not os.path.exists(model_path):
        raise FileNotFoundError(
            f"❌ Model file not found: {model_path}\n"
            "Please run the ML training notebook (02_ml_training.ipynb) first to train and save the model."
        )
    
    # Load model
    model = joblib.load(model_path)
    print(f"✅ Model loaded from {model_path}")
    
    # Load encoders
    if os.path.exists(encoders_path):
        encoders = joblib.load(encoders_path)
        print(f"✅ Encoders loaded from {encoders_path}")
    else:
        print(f"⚠️  Encoders file not found: {encoders_path}")
        encoders = None
    
    # Load model info
    if os.path.exists(info_path):
        with open(info_path, 'r') as f:
            model_info = json.load(f)
        print(f"✅ Model info loaded from {info_path}")
    else:
        print(f"⚠️  Model info file not found: {info_path}")
        model_info = None
    
    return model, encoders, model_info

# Load all artifacts
print("Loading model artifacts...")
model, encoders, model_info = load_model_artifacts()

# Display model information
print("\nModel Information:")
print(f"Model type: {type(model).__name__}")
if model_info:
    print(f"Features: {model_info.get('n_features', 'Unknown')}")
    print(f"Feature names: {model_info.get('feature_names', ['Unknown'])}")
    print(f"Target: {model_info.get('target_name', 'Unknown')}")
if encoders:
    print(f"Encoders available: {list(encoders.keys())}")

Loading model artifacts...
✅ Model loaded from ./models/animal_adoption_model.pkl
✅ Encoders loaded from ./models/label_encoders.pkl
✅ Model info loaded from ./models/model_info.json

Model Information:
Model type: RandomForestClassifier
Features: 6
Feature names: ['animal_type', 'sex_outcome', 'age_in_days', 'primary_breed', 'color', 'outcome_month']
Target: adopted_label
Encoders available: ['animal_type', 'sex_outcome', 'primary_breed', 'color']


## Cell 4: Create Inference Script for SageMaker

In [13]:
def create_inference_script():
    """Create the inference script for SageMaker deployment"""
    
    # Create code directory
    os.makedirs('./code', exist_ok=True)
    
    inference_script = '''
import joblib
import pandas as pd
import numpy as np
import json
import os
from io import StringIO

def model_fn(model_dir):
    """Load the model and encoders for inference"""
    model = joblib.load(os.path.join(model_dir, 'animal_adoption_model.pkl'))
    encoders = joblib.load(os.path.join(model_dir, 'label_encoders.pkl'))
    
    return {'model': model, 'encoders': encoders}

def input_fn(request_body, request_content_type):
    import io, json, numpy as np, pandas as pd

    if request_content_type == "application/json":
        payload = json.loads(request_body)
        if isinstance(payload, dict) and "instances" in payload:
            data = payload["instances"]
            df = pd.DataFrame(data)
        else:
            df = pd.DataFrame(payload)
        return df

    if request_content_type in ("text/csv", "text/plain"):
        from io import StringIO
        return pd.read_csv(StringIO(request_body), header=None)

    if request_content_type == "application/x-npy":
        buf = io.BytesIO(request_body if isinstance(request_body, (bytes, bytearray)) else request_body.encode("latin1"))
        arr = np.load(buf, allow_pickle=False)
        # if you know the column order from training:
        # cols = ["animal_type","sex_outcome","age_in_days","primary_breed","color","outcome_month"]
        # return pd.DataFrame(arr, columns=cols) if arr.ndim == 2 else pd.DataFrame([arr], columns=cols)
        return arr  # if your predict_fn handles numpy too

    raise ValueError(f"Unsupported content type: {request_content_type}")

def predict_fn(input_data, model_dict):
    """Make predictions on input data"""
    model = model_dict['model']
    encoders = model_dict.get('encoders', None)

    
    # Preprocess the input data
    processed_data = input_data.copy()
    
   
    # If encoders are present, apply them; else assume the model can handle raw features (Pipeline)
    if encoders:
        for column, encoder in encoders.items():
            if column in processed_data.columns:
                processed_data[column] = processed_data[column].astype(str)
                unseen_mask = ~processed_data[column].isin(getattr(encoder, "classes_", []))
                if unseen_mask.any() and hasattr(encoder, "classes_"):
                    most_common = encoder.classes_[0]
                    processed_data.loc[unseen_mask, column] = most_common
                processed_data[column] = encoder.transform(processed_data[column])

    
    # Make predictions
    predictions = model.predict(processed_data)
    # Try to compute probabilities; gracefully degrade if unavailable
    probs = None
    try:
        probs = model.predict_proba(processed_data)
    except Exception:
        try:
            scores = model.decision_function(processed_data)
            if scores.ndim == 1:
                # Convert scores to pseudo-probabilities (sigmoid)
                import numpy as np
                probs = np.vstack([1/(1+np.exp(scores)), 1/(1+np.exp(-scores))]).T
            else:
                probs = scores  # best-effort
        except Exception:
            probs = None

    result = {'predictions': predictions.tolist()}
    if probs is not None:
        result['probabilities'] = probs.tolist()
    return result


def output_fn(prediction, content_type):
    """Format the prediction output"""
    if content_type == 'application/json':
        return json.dumps(prediction)
    else:
        raise ValueError(f"Unsupported content type: {content_type}")
'''
    
    # Write the inference script
    script_path = './code/inference.py'
    with open(script_path, 'w') as f:
        f.write(inference_script.strip())
    
    print(f"✅ Inference script created: {script_path}")
    return script_path

# Create the inference script
inference_script_path = create_inference_script()

# Also copy model artifacts to code directory for SageMaker
import shutil
try:
    shutil.copy('./models/animal_adoption_model.pkl', './code/')
    shutil.copy('./models/label_encoders.pkl', './code/')
    if os.path.exists('./models/model_info.json'):
        shutil.copy('./models/model_info.json', './code/')
    print("✅ Model artifacts copied to code directory for deployment")
except Exception as e:
    print(f"⚠️  Warning: Could not copy model artifacts: {e}")
    
# Package model artifacts for SageMaker (must be a tar.gz with files at root)
import tarfile
tar_path = './code/model.tar.gz'
try:
    with tarfile.open(tar_path, 'w:gz') as tar:
        tar.add('./code/animal_adoption_model.pkl', arcname='animal_adoption_model.pkl')
        # Encoders are optional; include if present
        if os.path.exists('./code/label_encoders.pkl'):
            tar.add('./code/label_encoders.pkl', arcname='label_encoders.pkl')
        if os.path.exists('./code/model_info.json'):
            tar.add('./code/model_info.json', arcname='model_info.json')
    print(f"✅ Packaged model artifact: {tar_path}")
except Exception as e:
    print(f"❌ Failed to create model tarball: {e}")


✅ Inference script created: ./code/inference.py
✅ Model artifacts copied to code directory for deployment
✅ Packaged model artifact: ./code/model.tar.gz


## Cell 5: Local Model Testing

In [14]:
def test_local_predictions():
    """Test the model locally before deploying to SageMaker"""
    print("Testing model predictions locally...")
    
    # Create sample test data
    sample_data = pd.DataFrame({
        'animal_type': ['Dog', 'Cat', 'Dog', 'Cat'],
        'sex_outcome': ['Spayed Female', 'Neutered Male', 'Intact Male', 'Spayed Female'],
        'age_in_days': [365, 730, 180, 1095],  # 1 year, 2 years, 6 months, 3 years
        'primary_breed': ['Pit Bull', 'Domestic Shorthair', 'Labrador Retriever', 'Siamese'],
        'color': ['Brown', 'Black', 'Yellow', 'Seal Point'],
        'outcome_month': [6, 3, 9, 12]  # June, March, September, December
    })
    
    print("Sample test data:")
    print(sample_data)
    
    # Preprocess the data using our encoders
    if encoders:
        processed_sample = sample_data.copy()
        
        for column, encoder in encoders.items():
            if column in processed_sample.columns:
                # Handle unseen categories
                sample_values = processed_sample[column].astype(str)
                unseen_mask = ~sample_values.isin(encoder.classes_)
                
                if unseen_mask.any():
                    print(f"⚠️  Found unseen categories in {column}: {sample_values[unseen_mask].tolist()}")
                    # Replace with most common class (first in classes_)
                    most_common = encoder.classes_[0]
                    sample_values[unseen_mask] = most_common
                    print(f"   Replaced with: {most_common}")
                
                processed_sample[column] = encoder.transform(sample_values)
        
        # Make predictions
        try:
            predictions = model.predict(processed_sample)
            probabilities = model.predict_proba(processed_sample)[:, 1]  # Probability of adoption
            
            print("\nPrediction Results:")
            for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
                outcome = "Adopted" if pred == 1 else "Not Adopted"
                confidence = prob if pred == 1 else (1 - prob)
                print(f"  Animal {i+1}: {outcome} (confidence: {confidence:.3f})")
                
            print("\n✅ Local testing completed successfully!")
            return True
            
        except Exception as e:
            print(f"❌ Local prediction failed: {e}")
            return False
    else:
        print("❌ No encoders available for preprocessing")
        return False

# Test the model locally
local_test_success = test_local_predictions()

Testing model predictions locally...
Sample test data:
  animal_type    sex_outcome  age_in_days       primary_breed       color  \
0         Dog  Spayed Female          365            Pit Bull       Brown   
1         Cat  Neutered Male          730  Domestic Shorthair       Black   
2         Dog    Intact Male          180  Labrador Retriever      Yellow   
3         Cat  Spayed Female         1095             Siamese  Seal Point   

   outcome_month  
0              6  
1              3  
2              9  
3             12  
⚠️  Found unseen categories in primary_breed: ['Pit Bull', 'Domestic Shorthair', 'Labrador Retriever']
   Replaced with: American
⚠️  Found unseen categories in color: ['Yellow', 'Seal Point']
   Replaced with: Black

Prediction Results:
  Animal 1: Adopted (confidence: 0.678)
  Animal 2: Not Adopted (confidence: 0.578)
  Animal 3: Not Adopted (confidence: 0.953)
  Animal 4: Not Adopted (confidence: 0.640)

✅ Local testing completed successfully!


## Cell 6: Deploy Model to SageMaker (Optional)

In [None]:
def deploy_to_sagemaker(endpoint_name: str = "animal-adoption-predictor",
                        instance_type: str = "ml.t2.medium",
                        timeout_secs: int = 900):
    """
    Create or update a SageMaker endpoint in-place.

    - If `endpoint_name` does not exist: creates Model, EndpointConfig, Endpoint.
    - If `endpoint_name` exists: creates a new Model + EndpointConfig and UPDATEs the endpoint to use them.
    """
    # --- Preconditions ---
    if not SAGEMAKER_ROLE or not BUCKET_NAME:
        print("⚠️  SageMaker deployment skipped - no AWS configuration available")
        return None
    if not local_test_success:
        print("❌ SageMaker deployment aborted - local testing failed")
        return None
    if sagemaker_session is None:
        print("❌ No SageMaker session available")
        return None

    import os, time
    from datetime import datetime
    from botocore.exceptions import ClientError
    from sagemaker.sklearn.model import SKLearnModel
    from sagemaker.base_predictor import Predictor
    from sagemaker.base_serializers import JSONSerializer
    from sagemaker.base_deserializers import JSONDeserializer

    # --- Ensure artifact exists & upload ---
    local_artifact = "./code/model.tar.gz"
    if not os.path.exists(local_artifact):
        raise FileNotFoundError("model.tar.gz not found. Re-run the packaging cell to create it.")

    model_data_uri = sagemaker_session.upload_data(path=local_artifact, key_prefix="models")
    print(f"✅ Uploaded model artifact to {model_data_uri}")

    # --- Define the Model object (serving image must match your training major/minor where relevant) ---
    ts = datetime.now().strftime("%Y%m%d-%H%M%S")
    model_name = f"animal-adoption-model-{ts}"
    endpoint_config_name = f"{endpoint_name}-config-{ts}"

    sklearn_model = SKLearnModel(
        model_data=model_data_uri,
        role=SAGEMAKER_ROLE,
        entry_point="inference.py",
        source_dir="./code",
        framework_version="1.2-1",   
        py_version="py3",
        sagemaker_session=sagemaker_session,
        name=model_name
    )

    sm_client = sagemaker_session.boto_session.client("sagemaker", region_name=AWS_REGION)

    # --- Helper: does the endpoint already exist? ---
    def _endpoint_exists(name: str) -> bool:
        try:
            sm_client.describe_endpoint(EndpointName=name)
            return True
        except sm_client.exceptions.ResourceNotFound:
            return False
        except ClientError as e:
            if "Could not find endpoint" in str(e):
                return False
            raise

    # --- Waiter/poller ---
    def _wait_for_endpoint(name: str, timeout: int):
        start = time.time()
        last_log = 0
        while True:
            resp = sm_client.describe_endpoint(EndpointName=name)
            status = resp.get("EndpointStatus")
            now = time.time()
            if now - last_log >= 15:
                print(f"  ⏳ Endpoint status: {status} (elapsed: {int(now-start)}s)")
                last_log = now

            if status in ("InService", "Failed", "OutOfService", "Deleting"):
                return status, resp.get("FailureReason")
            if now - start > timeout:
                raise TimeoutError(f"Timed out after {timeout}s. Last status={status}")
            time.sleep(8)

    try:
        if _endpoint_exists(endpoint_name):
            print(f"🔁 Endpoint '{endpoint_name}' exists. Updating in-place on {instance_type}...")
            # Register the new Model (no deploy yet)
            sklearn_model.create(instance_type=instance_type)
            # Create a fresh EndpointConfig pointing to the new model
            sm_client.create_endpoint_config(
                EndpointConfigName=endpoint_config_name,
                ProductionVariants=[{
                    "VariantName": "AllTraffic",
                    "ModelName": model_name,
                    "InitialInstanceCount": 1,
                    "InstanceType": instance_type
                }]
            )
            # Update existing endpoint to use the new config
            sm_client.update_endpoint(
                EndpointName=endpoint_name,
                EndpointConfigName=endpoint_config_name
            )
            status, reason = _wait_for_endpoint(endpoint_name, timeout_secs)
            if status != "InService":
                raise RuntimeError(f"Endpoint update failed. Status={status}. Reason={reason}")
            print(f"✅ Updated endpoint in-place: {endpoint_name}")

            predictor = Predictor(endpoint_name=endpoint_name, sagemaker_session=sagemaker_session)
        else:
            # -------- CREATE FLOW --------
            print(f"🚀 Creating new endpoint '{endpoint_name}' on {instance_type}...")
            predictor = sklearn_model.deploy(
                initial_instance_count=1,
                instance_type=instance_type,
                endpoint_name=endpoint_name,
                wait=True,               # block until InService
                model_name=model_name
            )
            print(f"✅ Created endpoint: {endpoint_name}")

        # Set JSON IO
        predictor.serializer = JSONSerializer()
        predictor.deserializer = JSONDeserializer()

        print(
            "🔗 Console: "
            f"https://{sagemaker_session.boto_region_name}.console.aws.amazon.com/sagemaker/home"
            f"?region={sagemaker_session.boto_region_name}#/endpoints/{endpoint_name}"
        )
        return predictor

    except ClientError as e:
        print(f"❌ SageMaker error: {e}")
        print("Tip: ensure the instance type is supported in your region (e.g., 'ml.t2.medium' or 'ml.m5.large').")
    except Exception as e:
        print(f"❌ Deployment error: {e}")

    return None



predictor = deploy_to_sagemaker(endpoint_name="animal-adoption-predictor", instance_type="ml.t2.medium")

if predictor:
    print("Predictor ready.")
else:
    print("No predictor available for inference testing.")

✅ Uploaded model artifact to s3://sagemaker-us-east-1-239285815587/models/model.tar.gz
🚀 Creating new endpoint 'animal-adoption-predictor' on ml.t2.medium...
---------------!✅ Created endpoint: animal-adoption-predictor
🔗 Console: https://us-east-1.console.aws.amazon.com/sagemaker/home?region=us-east-1#/endpoints/animal-adoption-predictor
Predictor ready.


## Cell 7: Test SageMaker Endpoint (if deployed)

In [None]:
def test_sagemaker_endpoint(predictor):
    if predictor is None:
        print("⚠️  No SageMaker endpoint to test")
        return

    print("🧪 Testing SageMaker endpoint...")

    test_data = {
        "animal_type": ["Dog", "Cat"],
        "sex_outcome": ["Spayed Female", "Neutered Male"],
        "age_in_days": [365, 730],
        "primary_breed": ["Pit Bull", "Domestic Shorthair"],
        "color": ["Brown", "Black"],
        "outcome_month": [6, 3],
    }

    try:
        result = predictor.predict(test_data) 
        print("✅ SageMaker endpoint test successful!")
        print("Results:", result)
        return result
    except Exception as e:
        print(f"❌ SageMaker endpoint test failed: {e}")
        return None

# Test the endpoint if it was deployed
if 'predictor' in locals():
    test_sagemaker_endpoint(predictor)

print("💡 SageMaker endpoint testing is commented out.")
print("Uncomment the lines above if you deployed the model to SageMaker.")

🧪 Testing SageMaker endpoint...
✅ SageMaker endpoint test successful!
Results: {'predictions': [1, 0], 'probabilities': [[0.32174619152108713, 0.6782538084789125], [0.5782331020192815, 0.42176689798071854]]}
💡 SageMaker endpoint testing is commented out.
Uncomment the lines above if you deployed the model to SageMaker.


## Cell 8: Batch Prediction Function

In [19]:
def make_batch_predictions(
    input_file_path,
    output_file_path=None,
    model_obj=None,
    encoders_obj=None
):
    """
    Make batch predictions on a CSV file using a fitted model and label encoders.
    - Reapplies training-time preprocessing: fillna for numeric, 'unknown' for categoricals, unseen-category handling.
    - Uses the same feature column order your model was trained with.
    """

    import os
    import numpy as np
    import pandas as pd

    # --- Resolve model / encoders from args or known globals ---
    model_candidate_order = [model_obj, globals().get("loaded_model"), globals().get("model")]
    model_use = next((m for m in model_candidate_order if m is not None), None)

    encoders_candidate_order = [encoders_obj, globals().get("loaded_encoders"), globals().get("label_encoders"), globals().get("encoders")]
    encoders_use = next((e for e in encoders_candidate_order if e is not None), None)

    if model_use is None:
        print("❌ No fitted model found. Pass model_obj=... or ensure 'loaded_model' or 'model' exists.")
        return None
    if encoders_use is None:
        print("❌ No label encoders found. Pass encoders_obj=... or ensure 'loaded_encoders' or 'label_encoders' exists.")
        return None

    # --- Required columns + training order (must match your training) ---
    required_columns = ['animal_type', 'sex_outcome', 'age_in_days', 'primary_breed', 'color', 'outcome_month']
    categorical_cols = ['animal_type', 'sex_outcome', 'primary_breed', 'color']
    numeric_cols = ['age_in_days', 'outcome_month']

    # --- Load input ---
    if not os.path.exists(input_file_path):
        print(f"❌ Input file not found: {input_file_path}")
        return None

    print(f"📂 Loading data from: {input_file_path}")
    data = pd.read_csv(input_file_path)
    print(f"✅ Loaded {len(data):,} records for prediction")

    # --- Column checks ---
    missing = [c for c in required_columns if c not in data.columns]
    if missing:
        print(f"❌ Missing required columns: {missing}")
        print(f"Available columns: {list(data.columns)}")
        return None

    # --- Copy & minimal preprocessing (align with training pipeline) ---
    X = data[required_columns].copy()

    # 1) Fill categoricals with 'unknown' and cast to string
    for c in categorical_cols:
        X[c] = X[c].astype(str).fillna('unknown').replace({'nan': 'unknown', 'None': 'unknown'})

    # 2) Numeric fills – mirror training behavior
    #    age_in_days: median; outcome_month: fallback to 1 if missing/invalid
    if X['age_in_days'].isna().any():
        age_median = X['age_in_days'].median()
        X['age_in_days'] = X['age_in_days'].fillna(age_median)
    # If any non-numeric slipped in, coerce and fill again
    X['age_in_days'] = pd.to_numeric(X['age_in_days'], errors='coerce')
    if X['age_in_days'].isna().any():
        X['age_in_days'] = X['age_in_days'].fillna(X['age_in_days'].median())

    X['outcome_month'] = pd.to_numeric(X['outcome_month'], errors='coerce')
    if X['outcome_month'].isna().any():
        X['outcome_month'] = X['outcome_month'].fillna(1)
    # (Optional) clamp to 1..12 if that’s your expectation
    X['outcome_month'] = X['outcome_month'].clip(lower=1, upper=12)

    # 3) Label-encode categoricals using training encoders
    for col in categorical_cols:
        if col not in encoders_use:
            return print(f"❌ Missing encoder for column '{col}'. Keys available: {list(encoders_use.keys())}") or None

        enc = encoders_use[col]
        vals = X[col].astype(str)

        # Map unseen categories to 'unknown' if present, else to the most frequent training class
        unseen_mask = ~vals.isin(enc.classes_)
        if unseen_mask.any():
            if 'unknown' in enc.classes_:
                vals.loc[unseen_mask] = 'unknown'
            else:
                # Fall back: map to the first class (encoders were fit on training distribution)
                vals.loc[unseen_mask] = enc.classes_[0]

        # Ensure encoder knows about any injected 'unknown'
        if 'unknown' in vals.values and 'unknown' not in enc.classes_:
            enc.classes_ = np.append(enc.classes_, 'unknown')

        X[col] = enc.transform(vals)

    # 4) Ensure column order matches training order
    X = X[required_columns]

    # 5) Final NaN guard (should be clean already)
    if X.isna().any().any():
        # As a last resort, fill remaining NaNs: numeric -> median; categorical -> mode(0)
        for c in X.columns:
            if c in numeric_cols:
                X[c] = X[c].fillna(X[c].median())
            else:
                X[c] = X[c].fillna(X[c].mode().iloc[0])

    # --- Predict ---
    try:
        preds = model_use.predict(X)
        probs = model_use.predict_proba(X)[:, 1]
    except Exception as e:
        print(f"❌ Prediction failed: {e}")
        return None

    # --- Assemble results ---
    result = data.copy()
    result['adoption_prediction'] = preds
    result['adoption_probability'] = probs
    result['prediction_confidence'] = np.where(preds == 1, probs, 1 - probs)

    # --- Summary ---
    adoption_rate = (preds == 1).mean() * 100.0
    print("\n📊 Prediction Summary:")
    print(f"Predicted adoption rate: {adoption_rate:.1f}%")
    print(f"High confidence predictions (>80%): {(result['prediction_confidence'] > 0.8).sum():,}")
    print(f"Average confidence: {result['prediction_confidence'].mean():.3f}")

    # --- Save (optional) ---
    if output_file_path:
        result.to_csv(output_file_path, index=False)
        print(f"✅ Results saved to: {output_file_path}")

    return result


# Example usage (uncomment and edit your paths):
results = make_batch_predictions(
    "./data/batch_animal_data.csv",
    "./data/predictions.csv",
    model_obj=model,          
    encoders_obj=encoders     
)

📂 Loading data from: ./data/batch_animal_data.csv
✅ Loaded 78 records for prediction

📊 Prediction Summary:
Predicted adoption rate: 15.4%
High confidence predictions (>80%): 26
Average confidence: 0.715
✅ Results saved to: ./data/predictions.csv


## Cell 9: Cleanup and Next Steps

In [18]:
def cleanup_resources():
    """Instructions for cleaning up SageMaker resources"""
    
    print("🧹 Resource Cleanup Instructions")
    print("=" * 50)
    
    print("\n⚠️  IMPORTANT: Clean up SageMaker resources to avoid charges!")
    print("\nIf you deployed a SageMaker endpoint:")
    print("\n1. Delete the endpoint:")
    print("   # Uncomment and run this if you deployed an endpoint:")
    print("   # predictor.delete_endpoint()")
    print("\n2. Check AWS Console:")
    print("   - Go to SageMaker console → Endpoints")
    print("   - Verify no endpoints are running")
    print("\n3. Clean up S3 model artifacts:")
    print("   - Check S3 bucket for uploaded models")
    print("   - Delete if no longer needed")
    
    print("\n💡 Next Steps for Production:")
    print("-" * 30)
    print("1. **Monitoring**: Set up CloudWatch alarms for endpoint health")
    print("2. **Scaling**: Configure auto-scaling for production traffic")
    print("3. **Security**: Implement VPC endpoints for private access")
    print("4. **Model Updates**: Set up automated retraining pipeline")
    print("5. **A/B Testing**: Deploy multiple model versions for comparison")
    
    print("\n🎯 Integration Options:")
    print("-" * 25)
    print("1. **Web App**: Build a simple web interface for predictions")
    print("2. **API**: Create REST API endpoints using AWS API Gateway")
    print("3. **Batch Processing**: Schedule daily/weekly batch predictions")
    print("4. **Real-time Stream**: Process incoming animal data streams")
    
cleanup_resources()

print("\n" + "="*60)
print("🎉 MODEL DEPLOYMENT NOTEBOOK COMPLETE!")
print("="*60)
print("\n✅ What you accomplished:")
print("  • Loaded and tested trained model locally")
print("  • Created SageMaker inference script")
print("  • Set up deployment pipeline (ready to use)")
print("  • Created batch prediction functionality")
print("\n🚀 Your model is ready for production deployment!")

🧹 Resource Cleanup Instructions

⚠️  IMPORTANT: Clean up SageMaker resources to avoid charges!

If you deployed a SageMaker endpoint:

1. Delete the endpoint:
   # Uncomment and run this if you deployed an endpoint:
   # predictor.delete_endpoint()

2. Check AWS Console:
   - Go to SageMaker console → Endpoints
   - Verify no endpoints are running

3. Clean up S3 model artifacts:
   - Check S3 bucket for uploaded models
   - Delete if no longer needed

💡 Next Steps for Production:
------------------------------
1. **Monitoring**: Set up CloudWatch alarms for endpoint health
2. **Scaling**: Configure auto-scaling for production traffic
3. **Security**: Implement VPC endpoints for private access
4. **Model Updates**: Set up automated retraining pipeline
5. **A/B Testing**: Deploy multiple model versions for comparison

🎯 Integration Options:
-------------------------
1. **Web App**: Build a simple web interface for predictions
2. **API**: Create REST API endpoints using AWS API Gateway
3