# Animal Adoption Model Deployment
This notebook deploys the trained model to SageMaker for real-time predictions and demonstrates how to make predictions on new data.

## Cell 1: Setup and Import Libraries

In [None]:
import pandas as pd
import numpy as np
import boto3
import sagemaker
from sagemaker.sklearn.model import SKLearnModel
from sagemaker.predictor import Predictor
import joblib
import json
import os
from datetime import datetime
import subprocess

print("✅ Libraries imported successfully")

## Cell 2: Configuration and Model Loading

In [None]:
# Configuration and Model Loading
import json
import os
from pathlib import Path

def load_config():
    """Load configuration with multiple fallback options"""
    
    # Option 1: Load from config.json (recommended)
    config_file = Path('../config.json')
    if config_file.exists():
        with open(config_file) as f:
            config = json.load(f)
        print(f"Configuration loaded from {config_file}")
        return config
    
    # Option 2: Try environment variables
    if os.environ.get('S3_BUCKET_NAME'):
        config = {
            's3_bucket_name': os.environ['S3_BUCKET_NAME'],
            'aws_region': os.environ.get('AWS_REGION', 'us-east-1'),
            'sagemaker_role_arn': os.environ.get('SAGEMAKER_ROLE_ARN', '')
        }
        print("Configuration loaded from environment variables")
        return config
    
    # Option 3: Interactive input (beginner-friendly fallback)
    print("Configuration not found. Let's set it up interactively.")
    print("(You can skip this by running: python config_generator.py)")
    
    config = {}
    bucket_name = input("Enter your S3 bucket name (from terraform output): ").strip()
    config['s3_bucket_name'] = bucket_name
    config['aws_region'] = 'us-east-1'
    
    # For deployment notebook, SageMaker role is more important
    sagemaker_role = input("Enter SageMaker role ARN (required for deployment): ").strip()
    if sagemaker_role:
        config['sagemaker_role_arn'] = sagemaker_role
    
    # Save for next time
    with open('../config.json', 'w') as f:
        json.dump(config, f, indent=2)
    print("Configuration saved to config.json for future use")
    
    return config

# Load configuration
try:
    config = load_config()
    BUCKET_NAME = config['s3_bucket_name']
    AWS_REGION = config.get('aws_region', 'us-east-1')
    SAGEMAKER_ROLE = config.get('sagemaker_role_arn', None)
    
    print(f"Using S3 bucket: {BUCKET_NAME}")
    print(f"Using AWS region: {AWS_REGION}")
    
    if SAGEMAKER_ROLE:
        print(f"SageMaker role: {SAGEMAKER_ROLE[:50]}...")
        
        # Initialize SageMaker session
        try:
            sagemaker_session = sagemaker.Session()
            print("SageMaker session initialized - deployment enabled")
        except ImportError:
            print("SageMaker package not available")
            sagemaker_session = None
    else:
        print("No SageMaker role configured - local testing only")
        sagemaker_session = None
    
except KeyboardInterrupt:
    print("Configuration cancelled. Please run this cell again when ready.")
    BUCKET_NAME = None
    AWS_REGION = 'us-east-1'
    SAGEMAKER_ROLE = None
    sagemaker_session = None

## Cell 3: Load Trained Model and Artifacts

In [None]:
def load_model_artifacts():
    """Load the trained model, encoders, and metadata"""
    
    # Check if model artifacts exist
    model_path = './models/animal_adoption_model.pkl'
    encoders_path = './models/label_encoders.pkl'
    info_path = './models/model_info.json'
    
    if not os.path.exists(model_path):
        raise FileNotFoundError(
            f"❌ Model file not found: {model_path}\n"
            "Please run the ML training notebook (02_ml_training.ipynb) first to train and save the model."
        )
    
    # Load model
    model = joblib.load(model_path)
    print(f"✅ Model loaded from {model_path}")
    
    # Load encoders
    if os.path.exists(encoders_path):
        encoders = joblib.load(encoders_path)
        print(f"✅ Encoders loaded from {encoders_path}")
    else:
        print(f"⚠️  Encoders file not found: {encoders_path}")
        encoders = None
    
    # Load model info
    if os.path.exists(info_path):
        with open(info_path, 'r') as f:
            model_info = json.load(f)
        print(f"✅ Model info loaded from {info_path}")
    else:
        print(f"⚠️  Model info file not found: {info_path}")
        model_info = None
    
    return model, encoders, model_info

# Load all artifacts
print("Loading model artifacts...")
model, encoders, model_info = load_model_artifacts()

# Display model information
print("\nModel Information:")
print(f"Model type: {type(model).__name__}")
if model_info:
    print(f"Features: {model_info.get('n_features', 'Unknown')}")
    print(f"Feature names: {model_info.get('feature_names', ['Unknown'])}")
    print(f"Target: {model_info.get('target_name', 'Unknown')}")
if encoders:
    print(f"Encoders available: {list(encoders.keys())}")

## Cell 4: Create Inference Script for SageMaker

In [None]:
def create_inference_script():
    """Create the inference script for SageMaker deployment"""
    
    # Create code directory
    os.makedirs('../code', exist_ok=True)
    
    inference_script = '''
import joblib
import pandas as pd
import numpy as np
import json
import os
from io import StringIO

def model_fn(model_dir):
    """Load the model and encoders for inference"""
    model = joblib.load(os.path.join(model_dir, 'animal_adoption_model.pkl'))
    encoders = joblib.load(os.path.join(model_dir, 'label_encoders.pkl'))
    
    return {'model': model, 'encoders': encoders}

def input_fn(request_body, request_content_type):
    """Parse input data for prediction"""
    if request_content_type == 'application/json':
        input_data = json.loads(request_body)
        return pd.DataFrame(input_data)
    elif request_content_type == 'text/csv':
        return pd.read_csv(StringIO(request_body))
    else:
        raise ValueError(f"Unsupported content type: {request_content_type}")

def predict_fn(input_data, model_dict):
    """Make predictions on input data"""
    model = model_dict['model']
    encoders = model_dict['encoders']
    
    # Preprocess the input data
    processed_data = input_data.copy()
    
    # Apply encoders to categorical columns
    for column, encoder in encoders.items():
        if column in processed_data.columns:
            # Handle unseen categories
            processed_data[column] = processed_data[column].astype(str)
            unseen_mask = ~processed_data[column].isin(encoder.classes_)
            
            if unseen_mask.any():
                # Replace unseen values with most common class
                most_common = encoder.classes_[0]  # First class is often most common
                processed_data.loc[unseen_mask, column] = most_common
            
            processed_data[column] = encoder.transform(processed_data[column])
    
    # Make predictions
    predictions = model.predict(processed_data)
    probabilities = model.predict_proba(processed_data)
    
    return {
        'predictions': predictions.tolist(),
        'probabilities': probabilities.tolist()
    }

def output_fn(prediction, content_type):
    """Format the prediction output"""
    if content_type == 'application/json':
        return json.dumps(prediction)
    else:
        raise ValueError(f"Unsupported content type: {content_type}")
'''
    
    # Write the inference script
    script_path = '../code/inference.py'
    with open(script_path, 'w') as f:
        f.write(inference_script.strip())
    
    print(f"✅ Inference script created: {script_path}")
    return script_path

# Create the inference script
inference_script_path = create_inference_script()

# Also copy model artifacts to code directory for SageMaker
import shutil
try:
    shutil.copy('../models/animal_adoption_model.pkl', '../code/')
    shutil.copy('../models/label_encoders.pkl', '../code/')
    if os.path.exists('../models/model_info.json'):
        shutil.copy('../models/model_info.json', '../code/')
    print("✅ Model artifacts copied to code directory for deployment")
except Exception as e:
    print(f"⚠️  Warning: Could not copy model artifacts: {e}")

## Cell 5: Local Model Testing

In [None]:
def test_local_predictions():
    """Test the model locally before deploying to SageMaker"""
    print("Testing model predictions locally...")
    
    # Create sample test data
    sample_data = pd.DataFrame({
        'animal_type': ['Dog', 'Cat', 'Dog', 'Cat'],
        'sex_outcome': ['Spayed Female', 'Neutered Male', 'Intact Male', 'Spayed Female'],
        'age_in_days': [365, 730, 180, 1095],  # 1 year, 2 years, 6 months, 3 years
        'primary_breed': ['Pit Bull', 'Domestic Shorthair', 'Labrador Retriever', 'Siamese'],
        'color': ['Brown', 'Black', 'Yellow', 'Seal Point'],
        'outcome_month': [6, 3, 9, 12]  # June, March, September, December
    })
    
    print("Sample test data:")
    print(sample_data)
    
    # Preprocess the data using our encoders
    if encoders:
        processed_sample = sample_data.copy()
        
        for column, encoder in encoders.items():
            if column in processed_sample.columns:
                # Handle unseen categories
                sample_values = processed_sample[column].astype(str)
                unseen_mask = ~sample_values.isin(encoder.classes_)
                
                if unseen_mask.any():
                    print(f"⚠️  Found unseen categories in {column}: {sample_values[unseen_mask].tolist()}")
                    # Replace with most common class (first in classes_)
                    most_common = encoder.classes_[0]
                    sample_values[unseen_mask] = most_common
                    print(f"   Replaced with: {most_common}")
                
                processed_sample[column] = encoder.transform(sample_values)
        
        # Make predictions
        try:
            predictions = model.predict(processed_sample)
            probabilities = model.predict_proba(processed_sample)[:, 1]  # Probability of adoption
            
            print("\nPrediction Results:")
            for i, (pred, prob) in enumerate(zip(predictions, probabilities)):
                outcome = "Adopted" if pred == 1 else "Not Adopted"
                confidence = prob if pred == 1 else (1 - prob)
                print(f"  Animal {i+1}: {outcome} (confidence: {confidence:.3f})")
                
            print("\n✅ Local testing completed successfully!")
            return True
            
        except Exception as e:
            print(f"❌ Local prediction failed: {e}")
            return False
    else:
        print("❌ No encoders available for preprocessing")
        return False

# Test the model locally
local_test_success = test_local_predictions()

## Cell 6: Deploy Model to SageMaker (Optional)

In [None]:
def deploy_to_sagemaker():
    """Deploy the model to SageMaker endpoint"""
    
    if not SAGEMAKER_ROLE or not BUCKET_NAME:
        print("⚠️  SageMaker deployment skipped - no AWS configuration available")
        print("To deploy to SageMaker:")
        print("1. Run terraform apply to create AWS resources")
        print("2. Restart this notebook to pick up the configuration")
        return None
    
    if not local_test_success:
        print("❌ SageMaker deployment aborted - local testing failed")
        return None
    
    print("Deploying model to SageMaker...")
    
    try:
        # Create SKLearn model
        sklearn_model = SKLearnModel(
            model_data=f's3://{BUCKET_NAME}/models/',  # Will be created when we upload
            role=SAGEMAKER_ROLE,
            entry_point='inference.py',
            source_dir='../code',
            framework_version='1.0-1',
            py_version='py3',
            sagemaker_session=sagemaker_session
        )
        
        # Deploy the model
        endpoint_name = f'animal-adoption-predictor-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
        
        print(f"Creating endpoint: {endpoint_name}")
        print("This may take 5-10 minutes...")
        
        predictor = sklearn_model.deploy(
            initial_instance_count=1,
            instance_type='ml.t2.medium',  # Cost-effective for testing
            endpoint_name=endpoint_name
        )
        
        print(f"✅ Model deployed successfully!")
        print(f"Endpoint name: {endpoint_name}")
        print(f"Endpoint URL: https://{sagemaker_session.boto_region_name}.console.aws.amazon.com/sagemaker/home?region={sagemaker_session.boto_region_name}#/endpoints/{endpoint_name}")
        
        return predictor
        
    except Exception as e:
        print(f"❌ SageMaker deployment failed: {e}")
        print("This is often due to:")
        print("- Insufficient IAM permissions")
        print("- Incorrect S3 bucket configuration")
        print("- SageMaker service limits")
        return None

# Deploy to SageMaker
print("💡 SageMaker deployment is commented out to prevent accidental resource creation.")
print("Uncomment the line below to deploy to SageMaker:")
print("# predictor = deploy_to_sagemaker()")

predictor = deploy_to_sagemaker()

## Cell 7: Test SageMaker Endpoint (if deployed)

In [None]:
def test_sagemaker_endpoint(predictor):
    """Test the deployed SageMaker endpoint"""
    
    if predictor is None:
        print("⚠️  No SageMaker endpoint to test (deployment was skipped or failed)")
        return
    
    print("🧪 Testing SageMaker endpoint...")
    
    # Test data
    test_data = {
        'animal_type': ['Dog', 'Cat'],
        'sex_outcome': ['Spayed Female', 'Neutered Male'],
        'age_in_days': [365, 730],
        'primary_breed': ['Pit Bull', 'Domestic Shorthair'],
        'color': ['Brown', 'Black'],
        'outcome_month': [6, 3]
    }
    
    try:
        # Make prediction via SageMaker endpoint
        result = predictor.predict(test_data)
        
        print("✅ SageMaker endpoint test successful!")
        print("Results:", result)
        
        return result
        
    except Exception as e:
        print(f"❌ SageMaker endpoint test failed: {e}")
        return None

# Test the endpoint if it was deployed
if 'predictor' in locals():
    test_sagemaker_endpoint(predictor)

print("💡 SageMaker endpoint testing is commented out.")
print("Uncomment the lines above if you deployed the model to SageMaker.")

## Cell 8: Batch Prediction Function

In [None]:
def make_batch_predictions(input_file_path, output_file_path=None):
    """Make batch predictions on a CSV file"""
    
    if not os.path.exists(input_file_path):
        print(f"❌ Input file not found: {input_file_path}")
        return None
    
    print(f"📂 Loading data from: {input_file_path}")
    
    try:
        # Load the data
        data = pd.read_csv(input_file_path)
        print(f"✅ Loaded {len(data):,} records for prediction")
        
        # Ensure required columns exist
        required_columns = ['animal_type', 'sex_outcome', 'age_in_days', 'primary_breed', 'color', 'outcome_month']
        missing_columns = [col for col in required_columns if col not in data.columns]
        
        if missing_columns:
            print(f"❌ Missing required columns: {missing_columns}")
            print(f"Available columns: {list(data.columns)}")
            return None
        
        # Preprocess the data
        processed_data = data[required_columns].copy()
        
        if encoders:
            for column, encoder in encoders.items():
                if column in processed_data.columns:
                    # Handle unseen categories
                    sample_values = processed_data[column].astype(str)
                    unseen_mask = ~sample_values.isin(encoder.classes_)
                    
                    if unseen_mask.any():
                        most_common = encoder.classes_[0]
                        sample_values[unseen_mask] = most_common
                    
                    processed_data[column] = encoder.transform(sample_values)
        
        # Make predictions
        predictions = model.predict(processed_data)
        probabilities = model.predict_proba(processed_data)[:, 1]
        
        # Add predictions to original data
        result_data = data.copy()
        result_data['adoption_prediction'] = predictions
        result_data['adoption_probability'] = probabilities
        result_data['prediction_confidence'] = np.where(
            predictions == 1, probabilities, 1 - probabilities
        )
        
        # Summary statistics
        adoption_rate = (predictions == 1).mean() * 100
        print(f"\n📊 Prediction Summary:")
        print(f"Predicted adoption rate: {adoption_rate:.1f}%")
        print(f"High confidence predictions (>80%): {(result_data['prediction_confidence'] > 0.8).sum():,}")
        print(f"Average confidence: {result_data['prediction_confidence'].mean():.3f}")
        
        # Save results if output path provided
        if output_file_path:
            result_data.to_csv(output_file_path, index=False)
            print(f"✅ Results saved to: {output_file_path}")
        
        return result_data
        
    except Exception as e:
        print(f"❌ Batch prediction failed: {e}")
        return None

# Example usage (commented out - uncomment to use with your data)
print("Batch prediction example:")
print("# results = make_batch_predictions('../data/new_animals.csv', '../data/predictions.csv')")
print("\nTo use batch predictions:")
print("1. Prepare a CSV file with the required columns")
print("2. Call make_batch_predictions(input_file, output_file)")
print("3. Review the predictions in the output file")

## Cell 9: Cleanup and Next Steps

In [None]:
def cleanup_resources():
    """Instructions for cleaning up SageMaker resources"""
    
    print("🧹 Resource Cleanup Instructions")
    print("=" * 50)
    
    print("\n⚠️  IMPORTANT: Clean up SageMaker resources to avoid charges!")
    print("\nIf you deployed a SageMaker endpoint:")
    print("\n1. Delete the endpoint:")
    print("   # Uncomment and run this if you deployed an endpoint:")
    print("   # predictor.delete_endpoint()")
    print("\n2. Check AWS Console:")
    print("   - Go to SageMaker console → Endpoints")
    print("   - Verify no endpoints are running")
    print("\n3. Clean up S3 model artifacts:")
    print("   - Check S3 bucket for uploaded models")
    print("   - Delete if no longer needed")
    
    print("\n💡 Next Steps for Production:")
    print("-" * 30)
    print("1. **Monitoring**: Set up CloudWatch alarms for endpoint health")
    print("2. **Scaling**: Configure auto-scaling for production traffic")
    print("3. **Security**: Implement VPC endpoints for private access")
    print("4. **Model Updates**: Set up automated retraining pipeline")
    print("5. **A/B Testing**: Deploy multiple model versions for comparison")
    
    print("\n🎯 Integration Options:")
    print("-" * 25)
    print("1. **Web App**: Build a simple web interface for predictions")
    print("2. **API**: Create REST API endpoints using AWS API Gateway")
    print("3. **Batch Processing**: Schedule daily/weekly batch predictions")
    print("4. **Real-time Stream**: Process incoming animal data streams")
    
cleanup_resources()

print("\n" + "="*60)
print("🎉 MODEL DEPLOYMENT NOTEBOOK COMPLETE!")
print("="*60)
print("\n✅ What you accomplished:")
print("  • Loaded and tested trained model locally")
print("  • Created SageMaker inference script")
print("  • Set up deployment pipeline (ready to use)")
print("  • Created batch prediction functionality")
print("\n🚀 Your model is ready for production deployment!")