In [27]:
import os
import glob
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import numpy as np
import random
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pprint
import pyspark
import pyspark.sql.functions as F

from pyspark.sql.functions import col
from pyspark.sql.types import StringType, IntegerType, FloatType, DateType


In [28]:
# Ensure we're working from project root
current_dir = os.getcwd()
if current_dir.endswith('notebooks'):
    os.chdir('..')
    print("Adjusted working directory to project root")

import utils.model_inference as mi

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Setup PySpark Session

In [29]:
# Initialize SparkSession for data processing
spark = pyspark.sql.SparkSession.builder \
    .appName("Model_Inference_Pipeline") \
    .master("local[*]") \
    .getOrCreate()

spark.sparkContext.setLogLevel("ERROR")

print("Spark session initialized for model inference")


Spark session initialized for model inference


## Configuration Setup

In [30]:
# Set inference configuration parameters
snapshot_date_str = "2024-01-01"  # Date for inference data
model_version = "credit_model_xgboost_2024_09_01"  # Model to use for inference

# Build comprehensive configuration
config = mi.build_inference_config(
    snapshot_date_str=snapshot_date_str,
    model_version=model_version,
    model_bank_directory="models/",
    feature_store_path="datamart/gold/feature_store/",
    predictions_output_path="datamart/gold/model_predictions/"
)

print("Inference Configuration:")
pprint.pprint(config)


Inference Configuration:
{'feature_store_path': 'datamart/gold/feature_store/',
 'model_artifact_filepath': 'models/credit_model_xgboost_2024_09_01.pkl',
 'model_bank_directory': 'models/',
 'model_version': 'credit_model_xgboost_2024_09_01',
 'prediction_columns': ['Customer_ID',
                        'snapshot_date',
                        'model_version',
                        'model_prediction_proba',
                        'model_prediction_binary',
                        'prediction_timestamp'],
 'predictions_output_path': 'datamart/gold/model_predictions/',
 'predictions_table_name': 'credit_model_xgboost_2024_09_01_predictions',
 'snapshot_date': datetime.datetime(2024, 1, 1, 0, 0),
 'snapshot_date_str': '2024-01-01'}


## Load Model Artifact from Model Bank

In [31]:
# Load the trained model and its preprocessing components
print(f"\n{'='*60}")
print("LOADING MODEL ARTIFACT")
print(f"{'='*60}")

model_artifact = mi.load_model_artifact(config)

if model_artifact:
    print(f"Model loaded successfully: {config['model_version']}")
    print(f"Model type: {model_artifact['model_type']}")
    print(f"Training date: {model_artifact.get('data_dates', {}).get('model_train_date_str', 'Unknown')}")
    print(f"Feature count: {model_artifact['data_stats']['feature_count']}")
else:
    raise ValueError(f"Failed to load model artifact: {config['model_artifact_filepath']}")



LOADING MODEL ARTIFACT
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Model loaded successfully: credit_model_xgboost_2024_09_01
Model type: xgboost
Training date: 2024-09-01
Feature count: 135


## Load Feature Store Data

In [32]:
# Extract features for the specified snapshot date
print(f"\n{'='*60}")
print(f"LOADING FEATURE DATA FOR {snapshot_date_str}")
print(f"{'='*60}")

feature_data = mi.load_inference_features(config, spark)

if feature_data is not None and len(feature_data) > 0:
    print(f"Feature data loaded successfully")
    print(f"Records for inference: {len(feature_data)}")
    print(f"Available features: {len([col for col in feature_data.columns if col not in ['Customer_ID', 'feature_snapshot_date']])}")
    print(f"Sample customer IDs: {feature_data['Customer_ID'].head(3).tolist()}")
else:
    raise ValueError(f"No feature data found for snapshot date: {snapshot_date_str}")



LOADING FEATURE DATA FOR 2024-01-01
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-01-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Feature data loaded successfully
Records for inference: 8974
Available features: 135
Sample customer IDs: ['CUS_0x1026', 'CUS_0x104a', 'CUS_0x1056']


## Data Preprocessing for Inference

In [33]:
# Apply the same preprocessing used during model training
print(f"\n{'='*60}")
print("PREPROCESSING INFERENCE DATA")
print(f"{'='*60}")

# Prepare features for modeling
X_inference, customer_info = mi.prepare_inference_features(
    feature_data=feature_data,
    model_artifact=model_artifact,
    config=config
)

if X_inference is not None:
    print(f"Processed feature matrix shape: {X_inference.shape}")
    print(f"Customer records: {len(customer_info)}")
else:
    raise ValueError("Feature preprocessing failed")



PREPROCESSING INFERENCE DATA
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Processed feature matrix shape: (8974, 135)
Customer records: 8974


## Model Inference and Prediction

In [34]:
# Generate predictions using the loaded model
print(f"\n{'='*60}")
print("GENERATING MODEL PREDICTIONS")
print(f"{'='*60}")

predictions_df = mi.generate_model_predictions(
    X_inference=X_inference,
    customer_info=customer_info,
    model_artifact=model_artifact,
    config=config
)

if predictions_df is not None and len(predictions_df) > 0:
    print(f"Predictions generated successfully")
    print(f"Total predictions: {len(predictions_df)}")
    print(f"Prediction statistics:")
    print(f"  Mean probability: {predictions_df['model_prediction_proba'].mean():.4f}")
    print(f"  Min probability: {predictions_df['model_prediction_proba'].min():.4f}")
    print(f"  Max probability: {predictions_df['model_prediction_proba'].max():.4f}")
    print(f"  Std probability: {predictions_df['model_prediction_proba'].std():.4f}")
    
    # Show sample predictions
    print(f"\nSample predictions:")
    print(predictions_df[['Customer_ID', 'model_prediction_proba', 'model_prediction_binary']].head())
else:
    raise ValueError("Prediction generation failed")


GENERATING MODEL PREDICTIONS
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0024, 0.9943]
Binary predictions distribution: [6690 2284]
Predictions generated successfully
Total predictions: 8974
Prediction statistics:
  Mean probability: 0.3120
  Min probability: 0.0024
  Max probability: 0.9943
  Std probability: 0.2931

Sample predictions:
  Customer_ID  model_prediction_proba  model_prediction_binary
0  CUS_0x1026                0.096556                        0
1  CUS_0x104a                0.037388                        0
2  CUS_0x1056                0.987560                        1
3  CUS_0x105c                0.453053                        0
4  CUS_0x1075                0.358939                        0


## Save Predictions to Datamart Gold Layer

In [35]:
# Store predictions in the gold layer following medallion architecture
print(f"\n{'='*60}")
print("SAVING PREDICTIONS TO DATAMART")
print(f"{'='*60}")

output_path = mi.save_predictions_to_datamart(
    predictions_df=predictions_df,
    config=config,
    spark=spark
)

if output_path:
    print(f"Predictions saved successfully to: {output_path}")
    print(f"Records saved: {len(predictions_df)}")
else:
    raise ValueError("Failed to save predictions to datamart")


SAVING PREDICTIONS TO DATAMART


[Stage 2:>                                                          (0 + 8) / 8]

Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_01_01.parquet
Records saved: 8974
Predictions saved successfully to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_01_01.parquet
Records saved: 8974


                                                                                

## Validate Saved Predictions

In [36]:
# validation of saved predictions
print(f"\n{'='*60}")
print("VALIDATING SAVED PREDICTIONS")
print(f"{'='*60}")

validation_results = mi.validate_saved_predictions(config, spark)

if validation_results['success']:
    print(f"Validation successful")
    print(f"Records in datamart: {validation_results['record_count']}")
    print(f"Files created: {validation_results['file_count']}")
else:
    print(f"Validation failed: {validation_results.get('error', 'Unknown error')}")



VALIDATING SAVED PREDICTIONS
Validation successful
Records in datamart: 8974
Files created: 18


# Backfill

In [37]:
print(f"\n{'='*80}")
print("BATCH PROCESSING SETUP")
print(f"{'='*80}")

# Configuration for batch processing
batch_start_date = "2023-01-01"
batch_end_date = "2024-12-01"

# Process all three trained models for comprehensive inference
batch_model_versions = [
    "credit_model_logistic_regression_2024_09_01",
    "credit_model_random_forest_2024_09_01", 
    "credit_model_xgboost_2024_09_01"
]

print(f"Batch processing configuration:")
print(f"Date range: {batch_start_date} to {batch_end_date}")
print(f"Models to process: {len(batch_model_versions)}")
for model in batch_model_versions:
    print(f"  - {model}")


BATCH PROCESSING SETUP
Batch processing configuration:
Date range: 2023-01-01 to 2024-12-01
Models to process: 3
  - credit_model_logistic_regression_2024_09_01
  - credit_model_random_forest_2024_09_01
  - credit_model_xgboost_2024_09_01


In [38]:
# Generate list of dates for batch processing
def generate_first_of_month_dates(start_date_str, end_date_str):
    """
    Generate list of first-of-month dates between start and end dates
    
    Args:
        start_date_str: Start date in YYYY-MM-DD format
        end_date_str: End date in YYYY-MM-DD format
    
    Returns:
        List of date strings in YYYY-MM-DD format
    """
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
    
    first_of_month_dates = []
    current_date = datetime(start_date.year, start_date.month, 1)

    while current_date <= end_date:
        first_of_month_dates.append(current_date.strftime("%Y-%m-%d"))
        
        # Move to first of next month
        if current_date.month == 12:
            current_date = datetime(current_date.year + 1, 1, 1)
        else:
            current_date = datetime(current_date.year, current_date.month + 1, 1)

    return first_of_month_dates

batch_dates = generate_first_of_month_dates(batch_start_date, batch_end_date)
print(f"Batch processing dates: {batch_dates}")

Batch processing dates: ['2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01', '2023-05-01', '2023-06-01', '2023-07-01', '2023-08-01', '2023-09-01', '2023-10-01', '2023-11-01', '2023-12-01', '2024-01-01', '2024-02-01', '2024-03-01', '2024-04-01', '2024-05-01', '2024-06-01', '2024-07-01', '2024-08-01', '2024-09-01', '2024-10-01', '2024-11-01', '2024-12-01']


In [39]:
# Execute batch processing for all models
print(f"\n{'='*60}")
print("EXECUTING BATCH INFERENCE FOR ALL MODELS")
print(f"{'='*60}")

# Store results for all models
all_batch_results = {}

for model_version in batch_model_versions:
    print(f"\nProcessing model: {model_version}")
    print(f"{'='*40}")
    
    batch_results = mi.run_batch_inference(
        date_list=batch_dates,
        model_version=model_version,
        spark=spark,
        model_bank_directory="models/",
        feature_store_path="datamart/gold/feature_store/",
        predictions_output_path="datamart/gold/model_predictions/"
    )
    
    all_batch_results[model_version] = batch_results
    
    # Summary for this model
    successful_results = [result for result in batch_results if result['success']]
    failed_results = [result for result in batch_results if not result['success']]
    
    print(f"Model {model_version} completed:")
    print(f"  Successful: {len(successful_results)} dates")
    print(f"  Failed: {len(failed_results)} dates")


EXECUTING BATCH INFERENCE FOR ALL MODELS

Processing model: credit_model_logistic_regression_2024_09_01
Starting batch inference for 24 dates
Model: credit_model_logistic_regression_2024_09_01

Processing date 1/24: 2023-01-01
Running inference for 2023-01-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-01-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-01-01 00:00:00
✗ Failed to process 2023-01-01: Failed to load feature data

Processing date 2/24: 2023-02-01
Running inference for 2023-02-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-02-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-02-01 00:00:00
✗ Failed to process 2023-02-01: Failed to load feature data

Processing date 3/24: 2023-03-01
Running inference for 2023-03-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-03-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-03-01 00:00:00
✗ Failed to process 2023-03-01: Failed to load feature data

Processing date 4/24: 2023-04-01
Running inference for 2023-04-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-04-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-04-01 00:00:00
✗ Failed to process 2023-04-01: Failed to load feature data

Processing date 5/24: 2023-05-01
Running inference for 2023-05-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-05-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-05-01 00:00:00
✗ Failed to process 2023-05-01: Failed to load feature data

Processing date 6/24: 2023-06-01
Running inference for 2023-06-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-06-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-06-01 00:00:00
✗ Failed to process 2023-06-01: Failed to load feature data

Processing date 7/24: 2023-07-01
Running inference for 2023-07-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-07-01 00:00:00
Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0175, 0.9931]
Binary predictions distribution: [4612 4362]
Created output directory: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2023_07_01.parquet
Records saved: 8974
✓ Successfully processed 2023-07-01

Processing date 8/24: 2023-08-01
Running inference for 2023-08-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-08-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0112, 0.9904]
Binary predictions distribution: [4638 4336]




Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2023_08_01.parquet
Records saved: 8974
✓ Successfully processed 2023-08-01

Processing date 9/24: 2023-09-01
Running inference for 2023-09-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-09-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0113, 0.9942]
Binary predictions distribution: [4725 4249]




Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2023_09_01.parquet
Records saved: 8974
✓ Successfully processed 2023-09-01

Processing date 10/24: 2023-10-01
Running inference for 2023-10-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-10-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0124, 0.9972]
Binary predictions distribution: [4812 4162]




Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2023_10_01.parquet
Records saved: 8974
✓ Successfully processed 2023-10-01

Processing date 11/24: 2023-11-01
Running inference for 2023-11-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-11-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0130, 0.9967]
Binary predictions distribution: [4884 4090]




Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2023_11_01.parquet
Records saved: 8974
✓ Successfully processed 2023-11-01

Processing date 12/24: 2023-12-01
Running inference for 2023-12-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-12-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0099, 0.9914]
Binary predictions distribution: [5026 3948]




Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2023_12_01.parquet
Records saved: 8974
✓ Successfully processed 2023-12-01

Processing date 13/24: 2024-01-01
Running inference for 2024-01-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-01-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0087, 0.9923]
Binary predictions distribution: [5072 3902]




Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_01_01.parquet
Records saved: 8974
✓ Successfully processed 2024-01-01

Processing date 14/24: 2024-02-01
Running inference for 2024-02-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-02-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0135, 0.9871]
Binary predictions distribution: [5179 3795]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_02_01.parquet
Records saved: 8974
✓ Successfully processed 2024-02-01

Processing date 15/24: 2024-03-01
Running inference for 2024-03-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-03-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0133, 0.9923]
Binary predictions distribution: [5263 3711]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_03_01.parquet
Records saved: 8974
✓ Successfully processed 2024-03-01

Processing date 16/24: 2024-04-01
Running inference for 2024-04-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-04-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0079, 0.9900]
Binary predictions distribution: [5363 3611]




Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_04_01.parquet
Records saved: 8974
✓ Successfully processed 2024-04-01

Processing date 17/24: 2024-05-01
Running inference for 2024-05-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-05-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0120, 0.9939]
Binary predictions distribution: [5478 3496]




Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_05_01.parquet
Records saved: 8974
✓ Successfully processed 2024-05-01

Processing date 18/24: 2024-06-01
Running inference for 2024-06-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-06-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (8974, 135)
Processed features shape: (8974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0113, 0.9883]
Binary predictions distribution: [5550 3424]
Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_06_01.parquet
Records saved: 8974
✓ Successfully processed 2024-06-01

Processing date 19/24: 2024-07-01
Running inference for 2024-07-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-07-01 00:00:00


                                                                                

Feature data loaded: 9479 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (9479, 135)
Processed features shape: (9479, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 9479
Predictions generated successfully
Prediction range: [0.0120, 0.9869]
Binary predictions distribution: [5846 3633]
Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_07_01.parquet
Records saved: 9479
✓ Successfully processed 2024-07-01

Processing date 20/24: 2024-08-01
Running inference for 2024-08-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-08-01 00:00:00


                                                                                

Feature data loaded: 10022 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (10022, 135)
Processed features shape: (10022, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 10022
Predictions generated successfully
Prediction range: [0.0118, 0.9906]
Binary predictions distribution: [6230 3792]
Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_08_01.parquet
Records saved: 10022
✓ Successfully processed 2024-08-01

Processing date 21/24: 2024-09-01
Running inference for 2024-09-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-09-01 00:00:00


                                                                                

Feature data loaded: 10515 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (10515, 135)
Processed features shape: (10515, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 10515
Predictions generated successfully
Prediction range: [0.0110, 0.9884]
Binary predictions distribution: [6587 3928]
Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_09_01.parquet
Records saved: 10515
✓ Successfully processed 2024-09-01

Processing date 22/24: 2024-10-01
Running inference for 2024-10-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-10-01 00:00:00


                                                                                

Feature data loaded: 10971 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (10971, 135)
Processed features shape: (10971, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 10971
Predictions generated successfully
Prediction range: [0.0129, 0.9909]
Binary predictions distribution: [6894 4077]
Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_10_01.parquet
Records saved: 10971
✓ Successfully processed 2024-10-01

Processing date 23/24: 2024-11-01
Running inference for 2024-11-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-11-01 00:00:00


                                                                                

Feature data loaded: 11459 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (11459, 135)
Processed features shape: (11459, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 11459
Predictions generated successfully
Prediction range: [0.0118, 0.9912]
Binary predictions distribution: [7193 4266]
Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_11_01.parquet
Records saved: 11459
✓ Successfully processed 2024-11-01

Processing date 24/24: 2024-12-01
Running inference for 2024-12-01 using credit_model_logistic_regression_2024_09_01
Model artifact loaded successfully from: models/credit_model_logistic_regression_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-12-01 00:00:00


                                                                                

Feature data loaded: 11974 records
Preparing features for inference...
Using preprocessor: LogisticRegressionPreprocessor
Expected features: 36
Raw features shape: (11974, 135)
Processed features shape: (11974, 36)
Preprocessing completed successfully
Generating model predictions...
Using model type: logistic_regression
Inference samples: 11974
Predictions generated successfully
Prediction range: [0.0084, 0.9930]
Binary predictions distribution: [7563 4411]
Predictions saved to: datamart/gold/model_predictions/credit_model_logistic_regression_2024_09_01/credit_model_logistic_regression_2024_09_01_predictions_2024_12_01.parquet
Records saved: 11974
✓ Successfully processed 2024-12-01
Model credit_model_logistic_regression_2024_09_01 completed:
  Successful: 18 dates
  Failed: 6 dates

Processing model: credit_model_random_forest_2024_09_01
Starting batch inference for 24 dates
Model: credit_model_random_forest_2024_09_01

Processing date 1/24: 2023-01-01
Running inference for 2023-01-01

                                                                                

No feature data found for snapshot date: 2023-01-01 00:00:00
✗ Failed to process 2023-01-01: Failed to load feature data

Processing date 2/24: 2023-02-01
Running inference for 2023-02-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-02-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-02-01 00:00:00
✗ Failed to process 2023-02-01: Failed to load feature data

Processing date 3/24: 2023-03-01
Running inference for 2023-03-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-03-01 00:00:00
No feature data found for snapshot date: 2023-03-01 00:00:00
✗ Failed to process 2023-03-01: Failed to load feature data

Processing date 4/24: 2023-04-01
Running inference for 2023-04-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-04-01 00:00:00
No feature data found for snapshot date: 2023-04-01 00:00:00
✗ Failed to process 2023-04-01: Failed to load feature data

Processing date 5/24: 2023-05-01
Running infer

                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0061, 0.9750]
Binary predictions distribution: [7196 1778]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2023_08_01.parquet
Records saved: 8974
✓ Successfully processed 2023-08-01

Processing date 9/24: 2023-09-01
Running inference for 2023-09-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-09-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0028, 0.9764]
Binary predictions distribution: [7106 1868]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2023_09_01.parquet
Records saved: 8974
✓ Successfully processed 2023-09-01

Processing date 10/24: 2023-10-01
Running inference for 2023-10-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-10-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0056, 0.9816]
Binary predictions distribution: [7055 1919]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2023_10_01.parquet
Records saved: 8974
✓ Successfully processed 2023-10-01

Processing date 11/24: 2023-11-01
Running inference for 2023-11-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-11-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0000, 0.9724]
Binary predictions distribution: [7008 1966]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2023_11_01.parquet
Records saved: 8974
✓ Successfully processed 2023-11-01

Processing date 12/24: 2023-12-01
Running inference for 2023-12-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-12-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0039, 0.9725]
Binary predictions distribution: [6946 2028]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2023_12_01.parquet
Records saved: 8974
✓ Successfully processed 2023-12-01

Processing date 13/24: 2024-01-01
Running inference for 2024-01-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-01-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0019, 0.9765]
Binary predictions distribution: [6884 2090]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_01_01.parquet
Records saved: 8974
✓ Successfully processed 2024-01-01

Processing date 14/24: 2024-02-01
Running inference for 2024-02-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-02-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0038, 0.9741]
Binary predictions distribution: [6792 2182]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_02_01.parquet
Records saved: 8974
✓ Successfully processed 2024-02-01

Processing date 15/24: 2024-03-01
Running inference for 2024-03-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-03-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0034, 0.9789]
Binary predictions distribution: [6697 2277]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_03_01.parquet
Records saved: 8974
✓ Successfully processed 2024-03-01

Processing date 16/24: 2024-04-01
Running inference for 2024-04-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-04-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0042, 0.9778]
Binary predictions distribution: [6652 2322]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_04_01.parquet
Records saved: 8974
✓ Successfully processed 2024-04-01

Processing date 17/24: 2024-05-01
Running inference for 2024-05-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-05-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0020, 0.9747]
Binary predictions distribution: [6591 2383]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_05_01.parquet
Records saved: 8974
✓ Successfully processed 2024-05-01

Processing date 18/24: 2024-06-01
Running inference for 2024-06-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-06-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (8974, 135)
Processed features shape: (8974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0000, 0.9817]
Binary predictions distribution: [6493 2481]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_06_01.parquet
Records saved: 8974
✓ Successfully processed 2024-06-01

Processing date 19/24: 2024-07-01
Running inference for 2024-07-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-07-01 00:00:00


                                                                                

Feature data loaded: 9479 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (9479, 135)
Processed features shape: (9479, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 9479
Predictions generated successfully
Prediction range: [0.0018, 0.9710]
Binary predictions distribution: [6846 2633]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_07_01.parquet
Records saved: 9479
✓ Successfully processed 2024-07-01

Processing date 20/24: 2024-08-01
Running inference for 2024-08-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-08-01 00:00:00


                                                                                

Feature data loaded: 10022 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (10022, 135)
Processed features shape: (10022, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 10022
Predictions generated successfully
Prediction range: [0.0011, 0.9750]
Binary predictions distribution: [7248 2774]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_08_01.parquet
Records saved: 10022
✓ Successfully processed 2024-08-01

Processing date 21/24: 2024-09-01
Running inference for 2024-09-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-09-01 00:00:00


                                                                                

Feature data loaded: 10515 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (10515, 135)
Processed features shape: (10515, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 10515
Predictions generated successfully
Prediction range: [0.0010, 0.9697]
Binary predictions distribution: [7538 2977]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_09_01.parquet
Records saved: 10515
✓ Successfully processed 2024-09-01

Processing date 22/24: 2024-10-01
Running inference for 2024-10-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-10-01 00:00:00


                                                                                

Feature data loaded: 10971 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (10971, 135)
Processed features shape: (10971, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 10971
Predictions generated successfully
Prediction range: [0.0012, 0.9671]
Binary predictions distribution: [7851 3120]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_10_01.parquet
Records saved: 10971
✓ Successfully processed 2024-10-01

Processing date 23/24: 2024-11-01
Running inference for 2024-11-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-11-01 00:00:00


                                                                                

Feature data loaded: 11459 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (11459, 135)
Processed features shape: (11459, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 11459
Predictions generated successfully
Prediction range: [0.0060, 0.9727]
Binary predictions distribution: [8227 3232]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_11_01.parquet
Records saved: 11459
✓ Successfully processed 2024-11-01

Processing date 24/24: 2024-12-01
Running inference for 2024-12-01 using credit_model_random_forest_2024_09_01
Model artifact loaded successfully from: models/credit_model_random_forest_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-12-01 00:00:00


                                                                                

Feature data loaded: 11974 records
Preparing features for inference...
Using preprocessor: RandomForestPreprocessor
Expected features: 140
Raw features shape: (11974, 135)
Processed features shape: (11974, 140)
Preprocessing completed successfully
Generating model predictions...
Using model type: random_forest
Inference samples: 11974
Predictions generated successfully
Prediction range: [0.0029, 0.9801]
Binary predictions distribution: [8564 3410]
Predictions saved to: datamart/gold/model_predictions/credit_model_random_forest_2024_09_01/credit_model_random_forest_2024_09_01_predictions_2024_12_01.parquet
Records saved: 11974
✓ Successfully processed 2024-12-01
Model credit_model_random_forest_2024_09_01 completed:
  Successful: 18 dates
  Failed: 6 dates

Processing model: credit_model_xgboost_2024_09_01
Starting batch inference for 24 dates
Model: credit_model_xgboost_2024_09_01

Processing date 1/24: 2023-01-01
Running inference for 2023-01-01 using credit_model_xgboost_2024_09_01
M

                                                                                

No feature data found for snapshot date: 2023-01-01 00:00:00
✗ Failed to process 2023-01-01: Failed to load feature data

Processing date 2/24: 2023-02-01
Running inference for 2023-02-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-02-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-02-01 00:00:00
✗ Failed to process 2023-02-01: Failed to load feature data

Processing date 3/24: 2023-03-01
Running inference for 2023-03-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-03-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-03-01 00:00:00
✗ Failed to process 2023-03-01: Failed to load feature data

Processing date 4/24: 2023-04-01
Running inference for 2023-04-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-04-01 00:00:00
No feature data found for snapshot date: 2023-04-01 00:00:00
✗ Failed to process 2023-04-01: Failed to load feature data

Processing date 5/24: 2023-05-01
Running inference for 2023-05-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-05-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-05-01 00:00:00
✗ Failed to process 2023-05-01: Failed to load feature data

Processing date 6/24: 2023-06-01
Running inference for 2023-06-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-06-01 00:00:00


                                                                                

No feature data found for snapshot date: 2023-06-01 00:00:00
✗ Failed to process 2023-06-01: Failed to load feature data

Processing date 7/24: 2023-07-01
Running inference for 2023-07-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-07-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0034, 0.9936]
Binary predictions distribution: [7098 1876]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2023_07_01.parquet
Records saved: 8974
✓ Successfully processed 2023-07-01

Processing date 8/24: 2023-08-01
Running inference for 2023-08-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-08-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0024, 0.9943]
Binary predictions distribution: [6984 1990]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2023_08_01.parquet
Records saved: 8974
✓ Successfully processed 2023-08-01

Processing date 9/24: 2023-09-01
Running inference for 2023-09-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-09-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0013, 0.9947]
Binary predictions distribution: [6942 2032]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2023_09_01.parquet
Records saved: 8974
✓ Successfully processed 2023-09-01

Processing date 10/24: 2023-10-01
Running inference for 2023-10-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-10-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0015, 0.9938]
Binary predictions distribution: [6905 2069]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2023_10_01.parquet
Records saved: 8974
✓ Successfully processed 2023-10-01

Processing date 11/24: 2023-11-01
Running inference for 2023-11-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-11-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0018, 0.9926]
Binary predictions distribution: [6828 2146]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2023_11_01.parquet
Records saved: 8974
✓ Successfully processed 2023-11-01

Processing date 12/24: 2023-12-01
Running inference for 2023-12-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2023-12-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0026, 0.9959]
Binary predictions distribution: [6779 2195]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2023_12_01.parquet
Records saved: 8974
✓ Successfully processed 2023-12-01

Processing date 13/24: 2024-01-01
Running inference for 2024-01-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-01-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0024, 0.9943]
Binary predictions distribution: [6690 2284]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_01_01.parquet
Records saved: 8974
✓ Successfully processed 2024-01-01

Processing date 14/24: 2024-02-01
Running inference for 2024-02-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-02-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0017, 0.9930]
Binary predictions distribution: [6601 2373]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_02_01.parquet
Records saved: 8974
✓ Successfully processed 2024-02-01

Processing date 15/24: 2024-03-01
Running inference for 2024-03-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-03-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0025, 0.9954]
Binary predictions distribution: [6491 2483]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_03_01.parquet
Records saved: 8974
✓ Successfully processed 2024-03-01

Processing date 16/24: 2024-04-01
Running inference for 2024-04-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-04-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0017, 0.9954]
Binary predictions distribution: [6432 2542]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_04_01.parquet
Records saved: 8974
✓ Successfully processed 2024-04-01

Processing date 17/24: 2024-05-01
Running inference for 2024-05-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-05-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0020, 0.9943]
Binary predictions distribution: [6373 2601]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_05_01.parquet
Records saved: 8974
✓ Successfully processed 2024-05-01

Processing date 18/24: 2024-06-01
Running inference for 2024-06-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-06-01 00:00:00


                                                                                

Feature data loaded: 8974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (8974, 135)
Processed features shape: (8974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 8974
Predictions generated successfully
Prediction range: [0.0025, 0.9931]
Binary predictions distribution: [6256 2718]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_06_01.parquet
Records saved: 8974
✓ Successfully processed 2024-06-01

Processing date 19/24: 2024-07-01
Running inference for 2024-07-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-07-01 00:00:00


                                                                                

Feature data loaded: 9479 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (9479, 135)
Processed features shape: (9479, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 9479
Predictions generated successfully
Prediction range: [0.0013, 0.9950]
Binary predictions distribution: [6589 2890]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_07_01.parquet
Records saved: 9479
✓ Successfully processed 2024-07-01

Processing date 20/24: 2024-08-01
Running inference for 2024-08-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-08-01 00:00:00


                                                                                

Feature data loaded: 10022 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (10022, 135)
Processed features shape: (10022, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 10022
Predictions generated successfully
Prediction range: [0.0019, 0.9952]
Binary predictions distribution: [6877 3145]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_08_01.parquet
Records saved: 10022
✓ Successfully processed 2024-08-01

Processing date 21/24: 2024-09-01
Running inference for 2024-09-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-09-01 00:00:00


                                                                                

Feature data loaded: 10515 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (10515, 135)
Processed features shape: (10515, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 10515
Predictions generated successfully
Prediction range: [0.0018, 0.9955]
Binary predictions distribution: [7129 3386]


                                                                                

Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_09_01.parquet
Records saved: 10515
✓ Successfully processed 2024-09-01

Processing date 22/24: 2024-10-01
Running inference for 2024-10-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-10-01 00:00:00


                                                                                

Feature data loaded: 10971 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (10971, 135)
Processed features shape: (10971, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 10971
Predictions generated successfully
Prediction range: [0.0022, 0.9935]
Binary predictions distribution: [7353 3618]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_10_01.parquet
Records saved: 10971
✓ Successfully processed 2024-10-01

Processing date 23/24: 2024-11-01
Running inference for 2024-11-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-11-01 00:00:00


                                                                                

Feature data loaded: 11459 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (11459, 135)
Processed features shape: (11459, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 11459
Predictions generated successfully
Prediction range: [0.0023, 0.9935]
Binary predictions distribution: [7666 3793]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_11_01.parquet
Records saved: 11459
✓ Successfully processed 2024-11-01

Processing date 24/24: 2024-12-01
Running inference for 2024-12-01 using credit_model_xgboost_2024_09_01
Model artifact loaded successfully from: models/credit_model_xgboost_2024_09_01.pkl
Loading features from: datamart/gold/feature_store/
Target snapshot date: 2024-12-01 00:00:00


                                                                                

Feature data loaded: 11974 records
Preparing features for inference...
Using preprocessor: XGBoostPreprocessor
Expected features: 135
Raw features shape: (11974, 135)
Processed features shape: (11974, 135)
Preprocessing completed successfully
Generating model predictions...
Using model type: xgboost
Inference samples: 11974
Predictions generated successfully
Prediction range: [0.0020, 0.9925]
Binary predictions distribution: [7988 3986]
Predictions saved to: datamart/gold/model_predictions/credit_model_xgboost_2024_09_01/credit_model_xgboost_2024_09_01_predictions_2024_12_01.parquet
Records saved: 11974
✓ Successfully processed 2024-12-01
Model credit_model_xgboost_2024_09_01 completed:
  Successful: 18 dates
  Failed: 6 dates


                                                                                

In [40]:
# Summary of all model batch processing
print(f"\n{'='*80}")
print("COMPREHENSIVE BATCH PROCESSING SUMMARY")
print(f"{'='*80}")

total_successful = 0
total_failed = 0

for model_version, batch_results in all_batch_results.items():
    successful_results = [result for result in batch_results if result['success']]
    failed_results = [result for result in batch_results if not result['success']]
    
    total_successful += len(successful_results)
    total_failed += len(failed_results)
    
    print(f"\n{model_version}:")
    print(f"  Successful: {len(successful_results)} dates")
    print(f"  Failed: {len(failed_results)} dates")
    
    if failed_results:
        print(f"  Sample error: {failed_results[0].get('error', 'Unknown error')}")

print(f"\nOverall Summary:")
print(f"  Total successful predictions: {total_successful}")
print(f"  Total failed predictions: {total_failed}")
print(f"  Success rate: {total_successful/(total_successful+total_failed)*100:.1f}%" if (total_successful+total_failed) > 0 else "N/A")


COMPREHENSIVE BATCH PROCESSING SUMMARY

credit_model_logistic_regression_2024_09_01:
  Successful: 18 dates
  Failed: 6 dates
  Sample error: Failed to load feature data

credit_model_random_forest_2024_09_01:
  Successful: 18 dates
  Failed: 6 dates
  Sample error: Failed to load feature data

credit_model_xgboost_2024_09_01:
  Successful: 18 dates
  Failed: 6 dates
  Sample error: Failed to load feature data

Overall Summary:
  Total successful predictions: 54
  Total failed predictions: 18
  Success rate: 75.0%


## Final Datamart Results

In [41]:
print(f"\n{'='*80}")
print("DATAMART VERIFICATION")
print(f"{'='*80}")

# Verify predictions for each model in datamart
all_datamart_summaries = {}

for model_version in batch_model_versions:
    print(f"\nVerifying datamart for {model_version}:")
    print(f"{'='*50}")
    
    datamart_summary = mi.verify_predictions_datamart(
        model_version=model_version,
        predictions_output_path="datamart/gold/model_predictions/",
        spark=spark
    )
    
    all_datamart_summaries[model_version] = datamart_summary
    
    if datamart_summary['success']:
        print(f"✓ Verification successful")
        print(f"  Files: {datamart_summary['total_files']}")
        print(f"  Records: {datamart_summary['total_records']:,}")
        print(f"  Date range: {datamart_summary['date_range']['min']} to {datamart_summary['date_range']['max']}")
        print(f"  Avg predictions/date: {datamart_summary['avg_records_per_date']:.0f}")
    else:
        print(f"✗ Verification failed: {datamart_summary.get('error', 'Unknown error')}")

# Overall datamart summary
print(f"\n{'='*60}")
print("OVERALL DATAMART SUMMARY")
print(f"{'='*60}")

total_files = sum([summary['total_files'] for summary in all_datamart_summaries.values() if summary['success']])
total_records = sum([summary['total_records'] for summary in all_datamart_summaries.values() if summary['success']])
successful_models = len([summary for summary in all_datamart_summaries.values() if summary['success']])

print(f"Models successfully processed: {successful_models}/{len(batch_model_versions)}")
print(f"Total prediction files across all models: {total_files}")
print(f"Total prediction records across all models: {total_records:,}")

# Show sample from the best model (XGBoost)
if 'credit_model_xgboost_2024_09_01' in all_datamart_summaries:
    xgb_summary = all_datamart_summaries['credit_model_xgboost_2024_09_01']
    if xgb_summary['success']:
        print(f"\nSample records from best model (XGBoost):")
        print(xgb_summary['sample_data'])


DATAMART VERIFICATION

Verifying datamart for credit_model_logistic_regression_2024_09_01:
✓ Verification successful
  Files: 18
  Records: 172,108
  Date range: 2023-07-01 to 2024-12-01
  Avg predictions/date: 9562

Verifying datamart for credit_model_random_forest_2024_09_01:
✓ Verification successful
  Files: 18
  Records: 172,108
  Date range: 2023-07-01 to 2024-12-01
  Avg predictions/date: 9562

Verifying datamart for credit_model_xgboost_2024_09_01:
✓ Verification successful
  Files: 18
  Records: 172,108
  Date range: 2023-07-01 to 2024-12-01
  Avg predictions/date: 9562

OVERALL DATAMART SUMMARY
Models successfully processed: 3/3
Total prediction files across all models: 54
Total prediction records across all models: 516,324

Sample records from best model (XGBoost):
  Customer_ID feature_snapshot_date snapshot_date  \
0  CUS_0x61de            2024-07-01    2024-07-01   
1  CUS_0x62a4            2024-07-01    2024-07-01   
2  CUS_0x62c3            2024-07-01    2024-07-01   


## Session Cleanup

In [42]:
# Clean up Spark session
spark.stop()
print(f"\nSpark session terminated")
print("Model inference pipeline execution complete!")


Spark session terminated
Model inference pipeline execution complete!
