In [3]:
##Step1
import boto3
import sagemaker
from sagemaker import image_uris
import pandas as pd
import time
import json

# Setup - minimal imports
session = sagemaker.Session()
region = session.boto_region_name
role = sagemaker.get_execution_role()
bucket = "c182567a4701745l12354044t1w717192035388-labbucket-qyu2bpf3cfmc"

In [4]:


## Step 2: build and evaluate simple models

def manual_train_val_test_split(X, y, train_size=0.7, val_size=0.15, test_size=0.15, random_seed=42):

    import random
    random.seed(random_seed)

    n_samples = len(X)
    n_train = int(n_samples * train_size)
    n_val = int(n_samples * val_size)

    # Create indices and shuffle
    indices = list(range(n_samples))
    random.shuffle(indices)

    train_indices = indices[:n_train]
    val_indices = indices[n_train:n_train + n_val]
    test_indices = indices[n_train + n_val:]

    if isinstance(X, pd.DataFrame):
        X_train = X.iloc[train_indices]
        X_val = X.iloc[val_indices]
        X_test = X.iloc[test_indices]
        y_train = y.iloc[train_indices]
        y_val = y.iloc[val_indices]
        y_test = y.iloc[test_indices]
    else:
        X_train = X[train_indices]
        X_val = X[val_indices]
        X_test = X[test_indices]
        y_train = y[train_indices]
        y_val = y[val_indices]
        y_test = y[test_indices]

    return X_train, X_val, X_test, y_train, y_val, y_test

def manual_train_test_split(X, y, test_size=0.2, random_seed=42):
    import random
    random.seed(random_seed)

    n_samples = len(X)
    n_test = int(n_samples * test_size)

    # Create indices and shuffle
    indices = list(range(n_samples))
    random.shuffle(indices)

    test_indices = indices[:n_test]
    train_indices = indices[n_test:]

    if isinstance(X, pd.DataFrame):
        X_train = X.iloc[train_indices]
        X_test = X.iloc[test_indices]
        y_train = y.iloc[train_indices]
        y_test = y.iloc[test_indices]
    else:
        X_train = X[train_indices]
        X_test = X[test_indices]
        y_train = y[train_indices]
        y_test = y[test_indices]

    return X_train, X_test, y_train, y_test

def manual_accuracy_score(y_true, y_pred):

    correct = 0
    for true, pred in zip(y_true, y_pred):
        if true == pred:
            correct += 1
    return correct / len(y_true)

def calculate_comprehensive_metrics(y_true, y_pred, y_proba=None):

    # Basic accuracy
    accuracy = manual_accuracy_score(y_true, y_pred)

    # Confusion matrix components
    tp = sum(1 for true, pred in zip(y_true, y_pred) if true == 1 and pred == 1)
    fp = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and pred == 1)
    tn = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and pred == 0)
    fn = sum(1 for true, pred in zip(y_true, y_pred) if true == 1 and pred == 0)

    # Additional metrics
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'confusion_matrix': {
            'true_positive': tp,
            'false_positive': fp,
            'true_negative': tn,
            'false_negative': fn
        }
    }

def simple_train_test(dataset_name, model_type="linear"):
    print(f"\n=== Training {model_type} on {dataset_name} ===")

    # 1. Load data
    df = pd.read_csv(dataset_name, header=None)

    # 2. Convert any boolean columns to 1/0
    bool_cols = df.select_dtypes(include=['bool']).columns
    if len(bool_cols) > 0:
        df[bool_cols] = df[bool_cols].astype(int)

    # 3. Split data manually
    y = df.iloc[:, 0]  # target
    X = df.iloc[:, 1:] # features

    X_train, X_test, y_train, y_test = manual_train_test_split(X, y, test_size=0.2)

    print(f"Data split: {X_train.shape[0]} train, {X_test.shape[0]} test")

    # 4. Save as CSV
    train_data = pd.concat([y_train, X_train], axis=1)
    test_data = X_test  # No labels for prediction

    train_file = f"train_{model_type}.csv"
    test_file = f"test_{model_type}.csv"

    train_data.to_csv(train_file, header=False, index=False)
    test_data.to_csv(test_file, header=False, index=False)

    # 5. Upload to S3
    prefix = f"final-{model_type}-{int(time.time())}"
    train_s3 = session.upload_data(train_file, bucket=bucket, key_prefix=f"{prefix}/train")
    test_s3 = session.upload_data(test_file, bucket=bucket, key_prefix=f"{prefix}/test")

    print("Data uploaded to S3")

    # 6. Get algorithm container
    if model_type == "linear":
        container = image_uris.retrieve("linear-learner", region, "1")
    else:
        container = image_uris.retrieve("xgboost", region, "1.2-1")

    # 7. Create and train model
    estimator = sagemaker.estimator.Estimator(
        image_uri=container,
        role=role,
        instance_count=1,
        instance_type="ml.m5.large",
        output_path=f"s3://{bucket}/{prefix}/output",
        sagemaker_session=session,
    )

    # Set hyperparameters
    if model_type == "linear":
        estimator.set_hyperparameters(
            predictor_type="binary_classifier",
            epochs=10,
            mini_batch_size=1000
        )
    else:
        estimator.set_hyperparameters(
            objective="binary:logistic",
            num_round=20
        )

    # Train
    print("Training started...")
    train_input = sagemaker.inputs.TrainingInput(train_s3, content_type='text/csv')
    estimator.fit({'train': train_input})
    print("Training completed!")

    # 8. Use BATCH TRANSFORM (most reliable)
    print("Running batch transform...")

    transformer = estimator.transformer(
        instance_count=1,
        instance_type='ml.m5.large',
        output_path=f's3://{bucket}/{prefix}/predictions'
    )

    transformer.transform(
        data=test_s3,
        content_type='text/csv',
        split_type='Line'
    )

    transformer.wait()
    print("Predictions completed!")

    # 9. Get predictions
    s3_client = boto3.client('s3')

    # Download prediction results
    predictions_key = f"{prefix}/predictions/test_{model_type}.csv.out"

    try:
        response = s3_client.get_object(Bucket=bucket, Key=predictions_key)
        predictions_content = response['Body'].read().decode('utf-8')

        # Parse predictions
        pred_probs = []
        for line in predictions_content.strip().split('\n'):
            if line.strip():
                if model_type == "linear":
                    # Linear Learner returns JSON
                    try:
                        pred_dict = json.loads(line.strip())
                        if 'score' in pred_dict:
                            pred_probs.append(pred_dict['score'])
                        else:
                            # Try to find any numeric value
                            for key, value in pred_dict.items():
                                if isinstance(value, (int, float)):
                                    pred_probs.append(float(value))
                                    break
                            else:
                                pred_probs.append(0.5)
                    except:
                        # If not JSON, try direct float
                        try:
                            pred_probs.append(float(line.strip()))
                        except:
                            pred_probs.append(0.5)
                else:
                    # XGBoost returns probability directly
                    try:
                        pred_probs.append(float(line.strip()))
                    except:
                        pred_probs.append(0.5)

        print(f"Parsed {len(pred_probs)} predictions")
        pred_labels = [1 if p > 0.5 else 0 for p in pred_probs]
        acc = manual_accuracy_score(y_test.values, pred_labels)

        print(f"{model_type.upper()} Accuracy: {acc:.4f}")

        print(f"Sample - True: {y_test.values[:3]}, Pred: {pred_labels[:3]}")
        return acc

    except Exception as e:
        print(f"Error getting predictions: {e}")
        return 0.0

# Step 3: Build and evaluate ensemble models
def build_ensemble_model(dataset_name, dataset_version):
    """Build and evaluate ensemble model using XGBoost with comprehensive metrics"""
    print(f"\nSTEP 3: BUILDING ENSEMBLE MODEL FOR {dataset_version.upper()}")
    print(f"Dataset: {dataset_name}")

    # 1. Load and prepare data
    df = pd.read_csv(dataset_name, header=None)

    # Convert any boolean columns to 1/0
    bool_cols = df.select_dtypes(include=['bool']).columns
    if len(bool_cols) > 0:
        df[bool_cols] = df[bool_cols].astype(int)

    # Split target and features
    y = df.iloc[:, 0]  # first column = target
    X = df.iloc[:, 1:] # rest = features

    print(f"Dataset shape: {df.shape}")
    print(f"Target distribution: {y.value_counts().to_dict()}")

    # 2. Split into training, validation, and testing sets (70-15-15)
    X_train, X_val, X_test, y_train, y_val, y_test = manual_train_val_test_split(
        X, y, train_size=0.7, val_size=0.15, test_size=0.15
    )

    print(f"Data split - Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

    # 3. Save datasets
    train_data = pd.concat([y_train, X_train], axis=1)
    val_data = pd.concat([y_val, X_val], axis=1)
    test_features = X_test

    train_file = f"train_ensemble_{dataset_version}.csv"
    val_file = f"val_ensemble_{dataset_version}.csv"
    test_file = f"test_ensemble_{dataset_version}.csv"

    train_data.to_csv(train_file, header=False, index=False)
    val_data.to_csv(val_file, header=False, index=False)
    test_features.to_csv(test_file, header=False, index=False)

    # 4. Upload to S3
    timestamp = str(int(time.time()))
    prefix = f"ensemble-xgboost-{dataset_version}-{timestamp}"

    train_s3 = session.upload_data(train_file, bucket=bucket, key_prefix=f"{prefix}/train")
    val_s3 = session.upload_data(val_file, bucket=bucket, key_prefix=f"{prefix}/validation")
    test_s3 = session.upload_data(test_file, bucket=bucket, key_prefix=f"{prefix}/test")

    print("Data uploaded to S3")

    # 5. Get XGBoost container
    container = image_uris.retrieve("xgboost", region, "1.2-1")

    # 6. Create XGBoost estimator with ensemble-friendly hyperparameters
    estimator = sagemaker.estimator.Estimator(
        image_uri=container,
        role=role,
        instance_count=1,
        instance_type="ml.m5.xlarge",  # Larger instance for ensemble
        output_path=f"s3://{bucket}/{prefix}/output",
        sagemaker_session=session,
    )

    # Set ensemble-optimized hyperparameters
    estimator.set_hyperparameters(
        objective="binary:logistic",
        num_round=100,  # More rounds for better convergence
        eta=0.1,  # Lower learning rate for stability
        max_depth=8,  # Deeper trees for complex patterns
        subsample=0.8,  # Subsampling for ensemble effect
        colsample_bytree=0.8,  # Column sampling for ensemble effect
        min_child_weight=3,  # Regularization
        gamma=0.1,  # Regularization
        eval_metric="auc"
    )

    # 7. Train model with validation set
    print("Training ensemble model...")
    train_input = sagemaker.inputs.TrainingInput(train_s3, content_type='text/csv')
    val_input = sagemaker.inputs.TrainingInput(val_s3, content_type='text/csv')

    estimator.fit({'train': train_input, 'validation': val_input})
    print("Ensemble training completed!")

    # 8. Host the model on another instance
    print("Hosting model on ml.m5.xlarge instance...")

    # Create transformer for batch inference
    transformer = estimator.transformer(
        instance_count=1,
        instance_type='ml.m5.xlarge',  # Different instance for hosting
        output_path=f's3://{bucket}/{prefix}/batch-transform',
        strategy='MultiRecord',
        assemble_with='Line',
        accept='text/csv'
    )

    # 9. Perform batch transform to evaluate on testing data
    print("Running batch transform on test data...")
    transformer.transform(
        data=test_s3,
        content_type='text/csv',
        split_type='Line'
    )

    transformer.wait()
    print("Batch transform completed!")

    # 10. Get predictions and calculate metrics
    s3_client = boto3.client('s3')
    predictions_key = f"{prefix}/batch-transform/test_ensemble_{dataset_version}.csv.out"

    try:
        response = s3_client.get_object(Bucket=bucket, Key=predictions_key)
        predictions_content = response['Body'].read().decode('utf-8')

        # Parse predictions
        pred_probs = []
        for line in predictions_content.strip().split('\n'):
            if line.strip():
                try:
                    pred_probs.append(float(line.strip()))
                except:
                    pred_probs.append(0.5)

        print(f"Parsed {len(pred_probs)} ensemble predictions")

        # Convert probabilities to labels
        pred_labels = [1 if p > 0.5 else 0 for p in pred_probs]

        # 11. Report comprehensive performance metrics
        metrics = calculate_comprehensive_metrics(y_test.values, pred_labels, pred_probs)

        print(f"\nENSEMBLE MODEL PERFORMANCE METRICS ({dataset_version.upper()}):")
        print(f"   Accuracy:  {metrics['accuracy']:.4f}")
        print(f"   Precision: {metrics['precision']:.4f}")
        print(f"   Recall:    {metrics['recall']:.4f}")
        print(f"   F1-Score:  {metrics['f1_score']:.4f}")
        print(f"   Confusion Matrix:")
        print(f"     - True Positives:  {metrics['confusion_matrix']['true_positive']}")
        print(f"     - False Positives: {metrics['confusion_matrix']['false_positive']}")
        print(f"     - True Negatives:  {metrics['confusion_matrix']['true_negative']}")
        print(f"     - False Negatives: {metrics['confusion_matrix']['false_negative']}")

        return metrics

    except Exception as e:
        print(f"Error getting ensemble predictions: {e}")
        return None

# Main execution
print("STARTING COMPREHENSIVE MODEL COMPARISON")

datasets = [
    ("combined_cvs_v1_small.csv", "v1"),
    ("combined_cvs_v2_small.csv", "v2")
]

results = {}
ensemble_results = {}

# Run simple models comparison
for dataset, version in datasets:
    print(f"\n{'='*60}")
    print(f"PROCESSING {version.upper()}: {dataset}")
    print(f"{'='*60}")

    # Run simple models
    try:
        linear_acc = simple_train_test(dataset, "linear")
    except Exception as e:
        print(f"Linear Learner failed: {e}")
        linear_acc = 0.0

    try:
        xgb_acc = simple_train_test(dataset, "xgboost")
    except Exception as e:
        print(f"XGBoost failed: {e}")
        xgb_acc = 0.0

    results[version] = {
        'linear': linear_acc,
        'xgboost': xgb_acc
    }

    print(f"\n{version.upper()} SIMPLE MODELS RESULTS:")
    print(f"  Linear Learner: {linear_acc:.4f}")
    print(f"  XGBoost: {xgb_acc:.4f}")

# Step 3: Build and evaluate ensemble models
print(f"\n{'='*80}")
print("STEP 3: BUILDING AND EVALUATING ENSEMBLE MODELS")
print(f"{'='*80}")

for dataset, version in datasets:
    print(f"\nBUILDING ENSEMBLE MODEL FOR {version.upper()}")
    ensemble_metrics = build_ensemble_model(dataset, version)
    ensemble_results[version] = ensemble_metrics

# FINAL COMPARISON
print(f"\n{'='*80}")
print("FINAL COMPREHENSIVE COMPARISON: SIMPLE vs ENSEMBLE MODELS")
print(f"{'='*80}")

for version in datasets:
    version_key = version[1]
    simple_linear = results[version_key]['linear']
    simple_xgb = results[version_key]['xgboost']
    ensemble_metrics = ensemble_results[version_key]

    print(f"\nDATASET {version_key.upper()} COMPARISON:")
    print(f"\n   SIMPLE MODELS:")
    print(f"     Linear Learner: {simple_linear:.4f}")
    print(f"     XGBoost:       {simple_xgb:.4f}")

    if ensemble_metrics:
        print(f"\n   ENSEMBLE MODEL (XGBoost Optimized):")
        print(f"     Accuracy:      {ensemble_metrics['accuracy']:.4f}")
        print(f"     Precision:     {ensemble_metrics['precision']:.4f}")
        print(f"     Recall:        {ensemble_metrics['recall']:.4f}")
        print(f"     F1-Score:      {ensemble_metrics['f1_score']:.4f}")

        # Performance comparison
        best_simple = max(simple_linear, simple_xgb)
        ensemble_acc = ensemble_metrics['accuracy']
        improvement = ensemble_acc - best_simple

        print(f"\n PERFORMANCE COMPARISON:")
        print(f"     Best Simple Model:    {best_simple:.4f}")
        print(f"     Ensemble Model:       {ensemble_acc:.4f}")
        print(f"     Improvement:          {improvement:+.4f}")

        if improvement > 0:
            print(f"Ensemble model performed better by {improvement:.4f}")
        elif improvement < 0:
            print(f"Simple model performed better by {-improvement:.4f}")
        else:
            print(f"Both approaches performed equally")

        print(f"\nOBSERVATIONS:")
        if improvement > 0.02:
            print(f"     - Ensemble model significantly outperforms simple models")
            print(f"     - The ensemble approach better captures complex patterns")
            print(f"     - Additional training rounds and regularization helped")
        elif improvement > 0:
            print(f"     - Ensemble model shows modest improvement")
            print(f"     - The dataset might have simpler patterns")
            print(f"     - Ensemble benefits are present but limited")
        else:
            print(f"     - Simple models are sufficient for this dataset")
            print(f"     - Ensemble complexity doesn't provide additional benefits")
            print(f"     - Consider feature engineering for better performance")
    else:
        print(f"\n Ensemble model failed for {version_key.upper()}")

print(f"\n{'='*80}")
print("COMPREHENSIVE ANALYSIS COMPLETED!")
print(f"{'='*80}")

STARTING COMPREHENSIVE MODEL COMPARISON

PROCESSING V1: combined_cvs_v1_small.csv

=== Training linear on combined_cvs_v1_small.csv ===
Data split: 88800 train, 22199 test
Data uploaded to S3


INFO:sagemaker:Creating training-job with name: linear-learner-2025-10-31-14-17-31-995


Training started...
2025-10-31 14:17:35 Starting - Starting the training job...
2025-10-31 14:17:51 Starting - Preparing the instances for training...
2025-10-31 14:18:14 Downloading - Downloading input data...
2025-10-31 14:19:00 Downloading - Downloading the training image.........
2025-10-31 14:20:26 Training - Training image download completed. Training in progress....
2025-10-31 14:21:01 Uploading - Uploading generated training model...
2025-10-31 14:21:19 Completed - Training job completed
..

INFO:sagemaker:Creating model with name: linear-learner-2025-10-31-14-21-49-299


Training seconds: 185
Billable seconds: 185
Training completed!
Running batch transform...


INFO:sagemaker:Creating transform job with name: linear-learner-2025-10-31-14-21-49-942


.................................
...Predictions completed!
Parsed 22199 predictions
LINEAR Accuracy: 0.7589
Sample - True: [1 0 1], Pred: [0, 0, 0]

=== Training xgboost on combined_cvs_v1_small.csv ===
Data split: 88800 train, 22199 test


INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-10-31-14-27-56-631


Data uploaded to S3
Training started...
2025-10-31 14:27:57 Starting - Starting the training job...
2025-10-31 14:28:12 Starting - Preparing the instances for training...
2025-10-31 14:28:36 Downloading - Downloading input data...
2025-10-31 14:29:27 Downloading - Downloading the training image......
2025-10-31 14:30:28 Training - Training image download completed. Training in progress....
2025-10-31 14:31:01 Uploading - Uploading generated training model
2025-10-31 14:31:01 Completed - Training job completed
..

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2025-10-31-14-31-13-441


Training seconds: 145
Billable seconds: 145
Training completed!
Running batch transform...


INFO:sagemaker:Creating transform job with name: sagemaker-xgboost-2025-10-31-14-31-14-084


..............................
...Predictions completed!
Parsed 22199 predictions
XGBOOST Accuracy: 0.7651
Sample - True: [1 0 1], Pred: [0, 0, 0]

V1 SIMPLE MODELS RESULTS:
  Linear Learner: 0.7589
  XGBoost: 0.7651

PROCESSING V2: combined_cvs_v2_small.csv

=== Training linear on combined_cvs_v2_small.csv ===
Data split: 88080 train, 22019 test


INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: linear-learner-2025-10-31-14-36-50-013


Data uploaded to S3
Training started...
2025-10-31 14:36:51 Starting - Starting the training job...
2025-10-31 14:37:07 Starting - Preparing the instances for training...
2025-10-31 14:37:28 Downloading - Downloading input data...
2025-10-31 14:38:14 Downloading - Downloading the training image.........
2025-10-31 14:39:30 Training - Training image download completed. Training in progress....
2025-10-31 14:40:23 Uploading - Uploading generated training model
2025-10-31 14:40:23 Completed - Training job completed
..

INFO:sagemaker:Creating model with name: linear-learner-2025-10-31-14-40-37-032


Training seconds: 175
Billable seconds: 175
Training completed!
Running batch transform...


INFO:sagemaker:Creating transform job with name: linear-learner-2025-10-31-14-40-37-655


.......................................
...Predictions completed!
Parsed 22019 predictions
LINEAR Accuracy: 0.7580
Sample - True: [0 1 1], Pred: [0, 0, 0]

=== Training xgboost on combined_cvs_v2_small.csv ===
Data split: 88080 train, 22019 test
Data uploaded to S3


INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-10-31-14-47-44-458


Training started...
2025-10-31 14:47:45 Starting - Starting the training job...
2025-10-31 14:47:59 Starting - Preparing the instances for training...
2025-10-31 14:48:24 Downloading - Downloading input data...
2025-10-31 14:49:10 Downloading - Downloading the training image......
2025-10-31 14:50:10 Training - Training image download completed. Training in progress....
2025-10-31 14:50:44 Uploading - Uploading generated training model
2025-10-31 14:50:44 Completed - Training job completed
..

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2025-10-31-14-51-01-335


Training seconds: 140
Billable seconds: 140
Training completed!
Running batch transform...


INFO:sagemaker:Creating transform job with name: sagemaker-xgboost-2025-10-31-14-51-01-905


..............................
...Predictions completed!
Parsed 22019 predictions
XGBOOST Accuracy: 0.7678
Sample - True: [0 1 1], Pred: [0, 0, 0]

V2 SIMPLE MODELS RESULTS:
  Linear Learner: 0.7580
  XGBoost: 0.7678

STEP 3: BUILDING AND EVALUATING ENSEMBLE MODELS

BUILDING ENSEMBLE MODEL FOR V1

STEP 3: BUILDING ENSEMBLE MODEL FOR V1
Dataset: combined_cvs_v1_small.csv
Dataset shape: (110999, 94)
Target distribution: {0: 83910, 1: 27089}
Data split - Train: (77699, 93), Val: (16649, 93), Test: (16651, 93)


INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-10-31-14-56-38-352


Data uploaded to S3
Training ensemble model...
2025-10-31 14:56:39 Starting - Starting the training job...
2025-10-31 14:56:55 Starting - Preparing the instances for training...
2025-10-31 14:57:41 Downloading - Downloading the training image......
2025-10-31 14:58:26 Training - Training image download completed. Training in progress....
2025-10-31 14:59:02 Uploading - Uploading generated training model...
2025-10-31 14:59:15 Completed - Training job completed
..

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2025-10-31-14-59-55-769


Training seconds: 120
Billable seconds: 120
Ensemble training completed!
Hosting model on ml.m5.xlarge instance...


INFO:sagemaker:Creating transform job with name: sagemaker-xgboost-2025-10-31-14-59-56-388


Running batch transform on test data...
..............................
...Batch transform completed!
Parsed 16651 ensemble predictions

ENSEMBLE MODEL PERFORMANCE METRICS (V1):
   Accuracy:  0.7699
   Precision: 0.6790
   Recall:    0.1043
   F1-Score:  0.1809
   Confusion Matrix:
     - True Positives:  423
     - False Positives: 200
     - True Negatives:  12397
     - False Negatives: 3631

BUILDING ENSEMBLE MODEL FOR V2

STEP 3: BUILDING ENSEMBLE MODEL FOR V2
Dataset: combined_cvs_v2_small.csv
Dataset shape: (110099, 85)
Target distribution: {0: 83282, 1: 26817}
Data split - Train: (77069, 84), Val: (16514, 84), Test: (16516, 84)


INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2025-10-31-15-05-32-961


Data uploaded to S3
Training ensemble model...
2025-10-31 15:05:33 Starting - Starting the training job...
2025-10-31 15:05:54 Starting - Preparing the instances for training...
2025-10-31 15:06:17 Downloading - Downloading input data...
2025-10-31 15:06:42 Downloading - Downloading the training image...
2025-10-31 15:07:28 Training - Training image download completed. Training in progress....
2025-10-31 15:07:58 Uploading - Uploading generated training model...
2025-10-31 15:08:12 Completed - Training job completed
..

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2025-10-31-15-08-49-798


Training seconds: 114
Billable seconds: 114
Ensemble training completed!
Hosting model on ml.m5.xlarge instance...


INFO:sagemaker:Creating transform job with name: sagemaker-xgboost-2025-10-31-15-08-50-510


Running batch transform on test data...
.................................
...Batch transform completed!
Parsed 16516 ensemble predictions

ENSEMBLE MODEL PERFORMANCE METRICS (V2):
   Accuracy:  0.7698
   Precision: 0.6087
   Recall:    0.1492
   F1-Score:  0.2396
   Confusion Matrix:
     - True Positives:  599
     - False Positives: 385
     - True Negatives:  12115
     - False Negatives: 3417

FINAL COMPREHENSIVE COMPARISON: SIMPLE vs ENSEMBLE MODELS

DATASET V1 COMPARISON:

   SIMPLE MODELS:
     Linear Learner: 0.7589
     XGBoost:       0.7651

   ENSEMBLE MODEL (XGBoost Optimized):
     Accuracy:      0.7699
     Precision:     0.6790
     Recall:        0.1043
     F1-Score:      0.1809

 PERFORMANCE COMPARISON:
     Best Simple Model:    0.7651
     Ensemble Model:       0.7699
     Improvement:          +0.0048
Ensemble model performed better by 0.0048

OBSERVATIONS:
     - Ensemble model shows modest improvement
     - The dataset might have simpler patterns
     - Ensemble