<a href="https://colab.research.google.com/github/HRI328/Supervised_ML/blob/main/Model_Test_Example_and_Deployment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
    print("="*70)
    print("ML MODEL BUILDER - EXAMPLE WITH CATEGORICAL FEATURES")
    print("="*70)

    # Show available models
    print("\nðŸ“¦ Checking available libraries:")
    print(f"  âœ“ Scikit-learn: Available")
    print(f"  {'âœ“' if XGBOOST_AVAILABLE else 'âœ—'} XGBoost: {'Available' if XGBOOST_AVAILABLE else 'Not installed'}")
    print(f"  {'âœ“' if CATBOOST_AVAILABLE else 'âœ—'} CatBoost: {'Available' if CATBOOST_AVAILABLE else 'Not installed'}")

    if not XGBOOST_AVAILABLE:
        print("\n  Install XGBoost: pip install xgboost")
    if not CATBOOST_AVAILABLE:
        print("  Install CatBoost: pip install catboost")

    # Create sample classification data with categorical features
    from sklearn.datasets import make_classification

    # Generate numerical features
    X_num, y = make_classification(
        n_samples=1000, n_features=15, n_informative=8,
        n_redundant=5, random_state=42
    )

    # Add categorical features
    np.random.seed(42)
    categories = {
        'category_A': np.random.choice(['Type1', 'Type2', 'Type3'], size=1000),
        'category_B': np.random.choice(['Red', 'Blue', 'Green', 'Yellow'], size=1000),
        'category_C': np.random.choice(['Small', 'Medium', 'Large'], size=1000),
        'region': np.random.choice(['North', 'South', 'East', 'West'], size=1000),
    }

    # Combine numerical and categorical features
    X_df = pd.DataFrame(X_num, columns=[f'num_feature_{i}' for i in range(15)])
    for cat_name, cat_values in categories.items():
        X_df[cat_name] = cat_values

    # Top 5 of X_df
    print(X_df.head())

    print(f"\nDataset: {X_df.shape[0]} samples, {X_df.shape[1]} features")
    print(f"  - Numerical: 15 features")
    print(f"  - Categorical: 4 features")


In [None]:
# Initialize builder
builder = MLModelBuilder(task='classification', random_state=42)

print(f"\nðŸ“Š Models to be trained: {len(builder.models)}")
for model_name in builder.models.keys():
    print(f"  â€¢ {model_name}")

# Prepare data (automatically handles categorical features)
X_train, X_test, y_train, y_test = builder.prepare_data(
    X_df, y,
    test_size=0.2,
)

# Option 1: Use all numerical and categorical features, no feature selection
# builder.select_features(mode='A', pca_variance=0.95, pca_n_components=None, chi2_k=5)

# Option 2: Use Chi-Square for categorical feature selection, no feature selection for numerical features
# builder.select_features(mode='B', pca_variance=0.95, pca_n_components=None, chi2_k=5)

# Option 3: Use PCA for numerical feature selection, no category feature selection
# builder.select_features(mode='C', pca_variance=0.95, pca_n_components=None, chi2_k=5)

# Option 4: Use Chi-Square for categorical feature selection and PCA for numerical feature selection
builder.select_features(mode='D', pca_variance=0.95, pca_n_components=None, chi2_k=5)

builder.plot_pca_analysis()



In [None]:
# Train all models
builder.train_all_models(cv=5)

# Compare models
comparison = builder.compare_models()
# Plot comparison result
builder.plot_results()

In [None]:
# Optimize best boosting model (XGBoost or CatBoost if available)
if XGBOOST_AVAILABLE:
    print("\n" + "="*70)
    print("OPTIMIZING XGBOOST")
    print("="*70)
    xgb_param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.3],
        'subsample': [0.8, 1.0]
    }

    best_xgb, best_xgb_params = builder.optimize_model(
        'XGBoost',
        xgb_param_grid,
        search_type='random',
        cv=3,
        n_iter=10
    )

if CATBOOST_AVAILABLE:
    print("\n" + "="*70)
    print("OPTIMIZING CATBOOST")
    print("="*70)
    catboost_param_grid = {
        'iterations': [100, 200],
        'depth': [4, 6, 8],
        'learning_rate': [0.01, 0.1, 0.3]
    }

    best_catboost, best_catboost_params = builder.optimize_model(
        'CatBoost',
        catboost_param_grid,
        search_type='random',
        cv=3,
        n_iter=10
    )

# Also optimize Random Forest
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5]
}

best_rf, best_rf_params = builder.optimize_model(
    'Random Forest',
    rf_param_grid,
    search_type='grid',
    cv=5
)

In [None]:
# Select and evaluate best model
final_model, model_name = builder.select_best_model()

print("\n" + "="*70)
print("PREDICTION EXAMPLES")
print("="*70)

# Example 1: Predict on test set
print("\n1. Predicting on test set:")
y_pred = builder.predict(X_test)
print(f"   First 10 predictions: {y_pred[:10]}")
builder.evaluate_predictions(y_test, y_pred)


# Example 2: Predict with probabilities (classification)
if builder.task == 'classification':
    print("\n2. Predicting with probabilities:")
    y_pred, y_proba = builder.predict(X_test[:5], return_proba=True)
    print(f"   Predictions: {y_pred}")
    print(f"   Probabilities shape: {y_proba.shape}")
    print(f"   Sample probabilities:\n{y_proba[:3]}")

# # Example 3: Predict single instance
# print("\n3. Predicting single instance:")
# # Create a sample instance
# sample = X_df.iloc[0].to_dict()
# prediction = builder.predict(sample)
# print(f"   Input: {list(sample.keys())[:5]}...")
# print(f"   Prediction: {prediction}")

# Example 4: Evaluate predictions
print("\n4. Evaluating predictions on test and training set:")
y_pred_train = builder.predict(X_train)
metrics = builder.evaluate_predictions(y_train, y_pred_train)
y_pred_test = builder.predict(X_test)
metrics = builder.evaluate_predictions(y_test, y_pred_test)



In [None]:
# Example 5: Save modelA
print("\n5. Saving model:")
builder.save_model('my_best_model.pkl')

# Example 6: Load model and predict
print("\n6. Loading model and making predictions:")
new_builder = MLModelBuilder()
new_builder.load_model('my_best_model.pkl')

# Make prediction with loaded model
new_predictions = new_builder.predict(X_test)
builder.evaluate_predictions(y_test, new_predictions)
print(f"   Predictions from loaded model: {new_predictions}")

print("\n" + "="*70)
print("âœ“ PIPELINE COMPLETE!")
print(f"âœ“ Best Model: {model_name}")
print("="*70)