In [None]:
!pip install pycaret

Collecting pycaret
  Downloading pycaret-3.3.2-py3-none-any.whl.metadata (17 kB)
Collecting pandas<2.2.0 (from pycaret)
  Downloading pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting scipy<=1.11.4,>=1.6.1 (from pycaret)
  Downloading scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib<1.4,>=1.2.0 (from pycaret)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting pyod>=1.1.3 (from pycaret)
  Downloading pyod-2.0.2.tar.gz (165 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.8/165.8 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting category-encoders>=2.4.0 (from pycaret)
  Downloading category_encoders-2.6.4-py2.py3-none-any.whl.metadata (8.0 kB)
Colle

In [None]:
import pandas as pd
from pycaret.classification import *

# Load dataset
from sklearn.datasets import load_iris
iris = load_iris()
data = pd.DataFrame(iris.data, columns=iris.feature_names)
data['target'] = iris.target

# Rename columns for PyCaret compatibility
data.columns = [col.replace(" (cm)", "").replace(" ", "_") for col in data.columns]

# 1. Standard setup (PyCaret defaults)
print("Running default PyCaret setup...")
standard_setup = setup(data=data, target='target', session_id=42, verbose=False)
standard_models = compare_models()
standard_results = pull()

# 2. Explicit setup (custom parameters)
print("Running PyCaret setup with custom parameters...")
explicit_setup = setup(
    data=data,
    target='target',
    session_id=42,
    train_size=0.8,                  # Explicit train/test split
    numeric_imputation='mean',       # Default numeric imputation
    categorical_imputation='mode',   # Default categorical imputation
    normalize=True,                  # Enable normalization
    normalize_method='zscore',       # Use z-score normalization
    remove_multicollinearity=True,   # Remove multicollinear features
    multicollinearity_threshold=0.85,# Threshold for multicollinearity
    fix_imbalance=False,             # No imbalance fixing
    verbose=False
)
explicit_models = compare_models()
explicit_results = pull()

# 3. Selected models only
print("Running PyCaret with specific model selection...")
selected_models = ['rf', 'xgboost', 'lightgbm']  # Specify selected models
selected_models_setup = setup(data=data, target='target', session_id=42, verbose=False)
best_selected_model = compare_models(include=selected_models)
selected_results = pull()

# Display results
print("\n--- Default Results ---")
print(standard_results)

print("\n--- Explicit Results ---")
print(explicit_results)

print("\n--- Selected Models Results ---")
print(selected_results)


Running default PyCaret setup...


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
qda,Quadratic Discriminant Analysis,0.9809,0.0,0.9809,0.9852,0.9805,0.9711,0.9735,0.045
nb,Naive Bayes,0.9727,0.9974,0.9727,0.9806,0.9714,0.9588,0.9638,0.029
lda,Linear Discriminant Analysis,0.9718,0.0,0.9718,0.978,0.9712,0.9573,0.9609,0.025
et,Extra Trees Classifier,0.9718,1.0,0.9718,0.978,0.9712,0.9573,0.9609,0.156
lr,Logistic Regression,0.9618,0.0,0.9618,0.9705,0.961,0.9422,0.947,0.457
ada,Ada Boost Classifier,0.9618,0.0,0.9618,0.9705,0.961,0.9422,0.947,0.199
knn,K Neighbors Classifier,0.9527,0.9888,0.9527,0.9595,0.952,0.9284,0.9322,0.042
rf,Random Forest Classifier,0.9518,0.9958,0.9518,0.966,0.9487,0.927,0.9352,0.281
gbc,Gradient Boosting Classifier,0.9518,0.0,0.9518,0.966,0.9487,0.927,0.9352,0.273
xgboost,Extreme Gradient Boosting,0.9436,0.9847,0.9436,0.9588,0.9417,0.9149,0.9236,0.065


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Running PyCaret setup with custom parameters...


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lda,Linear Discriminant Analysis,0.9667,0.0,0.9667,0.9756,0.9653,0.95,0.9552,0.106
lr,Logistic Regression,0.9583,0.0,0.9583,0.9689,0.9568,0.9375,0.9436,0.062
qda,Quadratic Discriminant Analysis,0.9583,0.0,0.9583,0.9633,0.9579,0.9375,0.9403,0.037
nb,Naive Bayes,0.95,0.9927,0.95,0.9578,0.9484,0.925,0.9299,0.036
svm,SVM - Linear Kernel,0.95,0.0,0.95,0.9606,0.9485,0.925,0.931,0.044
knn,K Neighbors Classifier,0.9417,0.9688,0.9417,0.9472,0.9409,0.9125,0.9158,0.068
dt,Decision Tree Classifier,0.9417,0.9562,0.9417,0.9556,0.9399,0.9125,0.9205,0.038
gbc,Gradient Boosting Classifier,0.9417,0.0,0.9417,0.9556,0.9399,0.9125,0.9205,0.495
lightgbm,Light Gradient Boosting Machine,0.9417,0.9854,0.9417,0.95,0.941,0.9125,0.9171,0.869
rf,Random Forest Classifier,0.9333,0.9844,0.9333,0.9422,0.9323,0.9,0.9053,0.233


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Running PyCaret with specific model selection...


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9518,0.9958,0.9518,0.966,0.9487,0.927,0.9352,0.185
xgboost,Extreme Gradient Boosting,0.9436,0.9847,0.9436,0.9588,0.9417,0.9149,0.9236,0.053
lightgbm,Light Gradient Boosting Machine,0.9427,0.9739,0.9427,0.952,0.9417,0.9133,0.9183,0.493


Processing:   0%|          | 0/17 [00:00<?, ?it/s]


--- Default Results ---
                                    Model  Accuracy     AUC  Recall   Prec.  \
qda       Quadratic Discriminant Analysis    0.9809  0.0000  0.9809  0.9852   
nb                            Naive Bayes    0.9727  0.9974  0.9727  0.9806   
lda          Linear Discriminant Analysis    0.9718  0.0000  0.9718  0.9780   
et                 Extra Trees Classifier    0.9718  1.0000  0.9718  0.9780   
lr                    Logistic Regression    0.9618  0.0000  0.9618  0.9705   
ada                  Ada Boost Classifier    0.9618  0.0000  0.9618  0.9705   
knn                K Neighbors Classifier    0.9527  0.9888  0.9527  0.9595   
rf               Random Forest Classifier    0.9518  0.9958  0.9518  0.9660   
gbc          Gradient Boosting Classifier    0.9518  0.0000  0.9518  0.9660   
xgboost         Extreme Gradient Boosting    0.9436  0.9847  0.9436  0.9588   
lightgbm  Light Gradient Boosting Machine    0.9427  0.9739  0.9427  0.9520   
dt               Decision T