In [1]:
import os
import tarfile
import boto3
import mlflow
from mlflow.tracking import MlflowClient

# --- Configuration ---
# 1. IMPORTANT: Set this to the name of your S3 bucket.
S3_BUCKET_NAME = "flaskcapstonebucket" # e.g., "lead-conversion-raw-data-..."

# 2. This is the name of the registered model in the MLflow Model Registry.
MODEL_NAME_IN_REGISTRY = "LeadConversionModel"

# 3. These are the local paths to your artifacts.
#    This script assumes you have cloned your project into the SageMaker home directory.
PROJECT_ROOT = os.path.expanduser("~/FINAL_CAPSTONE")
LOCAL_ARTIFACTS_PATH = os.path.join(PROJECT_ROOT, "artifacts")

# 4. Define the structure for our deployment package.
STAGING_DIR = "staging"
MODEL_ARTIFACT_BUNDLE = "model.tar.gz"

print(f"Using S3 bucket: {S3_BUCKET_NAME}")
print(f"Using local artifacts from: {LOCAL_ARTIFACTS_PATH}")

# --- Step 1: Download the "Production" model from the MLflow Model Registry ---

print("\n--- Step 1: Downloading Production Model ---")
client = MlflowClient()

try:
    # Find the latest version of the model that is in the "Production" stage
    latest_versions = client.get_latest_versions(name=MODEL_NAME_IN_REGISTRY, stages=["Production"])
    if not latest_versions:
        raise Exception("No model found in the 'Production' stage.")
        
    production_model = latest_versions[0]
    model_uri = production_model.source
    
    print(f"Found production model: Version {production_model.version}, Run ID {production_model.run_id}")
    print(f"Model URI: {model_uri}")

    # Create a clean staging directory
    if os.path.exists(STAGING_DIR):
        shutil.rmtree(STAGING_DIR)
    os.makedirs(STAGING_DIR)

    # Download the model artifacts into the staging directory
    mlflow.artifacts.download_artifacts(artifact_uri=model_uri, dst_path=STAGING_DIR)
    
    # The model is downloaded into a subdirectory (e.g., 'StackingEnsemble'). We need to find its name.
    downloaded_model_name = [d for d in os.listdir(STAGING_DIR) if os.path.isdir(os.path.join(STAGING_DIR, d))][0]
    print(f"✅ Successfully downloaded model '{downloaded_model_name}' to '{STAGING_DIR}/'")

except Exception as e:
    print(f"❌ Failed to download model. Error: {e}")
    raise e

# --- Step 2: Copy all preprocessing artifacts into the package ---

print("\n--- Step 2: Copying Preprocessing Artifacts ---")
try:
    # The custom container expects the preprocessing artifacts to be in an 'artifacts' subdirectory
    destination_path = os.path.join(STAGING_DIR, "artifacts")
    
    # Copy the entire local artifacts directory into the staging directory
    shutil.copytree(LOCAL_ARTIFACTS_PATH, destination_path)
    
    print(f"✅ Successfully copied all preprocessing artifacts to '{destination_path}/'")

except Exception as e:
    print(f"❌ Failed to copy artifacts. Error: {e}")
    raise e

# --- Step 3: Create the compressed model.tar.gz file ---

print("\n--- Step 3: Creating model.tar.gz ---")
try:
    with tarfile.open(MODEL_ARTIFACT_BUNDLE, "w:gz") as tar:
        # The arcname parameter ensures we don't have the 'staging/' prefix in the archive
        tar.add(STAGING_DIR, arcname='.')
        
    print(f"✅ Successfully created '{MODEL_ARTIFACT_BUNDLE}'")

except Exception as e:
    print(f"❌ Failed to create tarball. Error: {e}")
    raise e

# --- Step 4: Upload the model package to S3 ---

print("\n--- Step 4: Uploading to S3 ---")
try:
    s3_client = boto3.client('s3')
    s3_model_path = f"models/{MODEL_NAME_IN_REGISTRY}/model.tar.gz"
    
    s3_client.upload_file(MODEL_ARTIFACT_BUNDLE, S3_BUCKET_NAME, s3_model_path)
    
    model_s3_uri = f"s3://{S3_BUCKET_NAME}/{s3_model_path}"
    print(f"✅ Successfully uploaded model package to: {model_s3_uri}")
    
    # This is the URI you will need for deployment
    %store model_s3_uri

except Exception as e:
    print(f"❌ Failed to upload to S3. Error: {e}")
    raise e

print("\n🎉 Packaging complete! The model is ready for deployment.")


Using S3 bucket: flaskcapstonebucket
Using local artifacts from: /home/sagemaker-user/FINAL_CAPSTONE/artifacts

--- Step 1: Downloading Production Model ---
❌ Failed to download model. Error: Registered Model with name=LeadConversionModel not found


  latest_versions = client.get_latest_versions(name=MODEL_NAME_IN_REGISTRY, stages=["Production"])


MlflowException: Registered Model with name=LeadConversionModel not found