In [1]:
# Import necessary libraries
import seaborn as sns
from pycaret.classification import *

# --- Step 1: Load the Dataset ---
# Load the Titanic dataset directly from Seaborn
titanic_df = sns.load_dataset('titanic')

# Display the first few rows to see the data
print("Original Titanic Dataset:")
print(titanic_df.head())
print("\n" + "="*50 + "\n")


# --- Step 2: Initialize PyCaret Environment ---
# The setup() function initializes the environment and prepares the data.
# It handles missing values, encodes categorical features, and splits the data.
print("Setting up PyCaret Environment...")
clf_setup = setup(data=titanic_df, 
                  target='survived',        # The column we want to predict
                  session_id=123,           # For reproducibility
                  ignore_features=['who', 'adult_male', 'alive', 'alone'], # Redundant columns
                  numeric_imputation='mean',
                  categorical_imputation='most_frequent',
                  silent=True)              # Suppresses user confirmation prompt
print("Setup Complete.\n")


# --- Step 3: Compare Baseline Models ---
# This function trains and evaluates all models in the PyCaret library
# using cross-validation. It returns a table ranking them by a chosen metric.
print("Comparing all baseline models...")
best_model = compare_models(sort='Accuracy') # Sort by accuracy
print("\n" + "="*50 + "\n")


# --- Step 4: Create and Tune the Best Model ---
# Let's assume the best model from compare_models() is LightGBM ('lightgbm').
# We can create it and then tune its hyperparameters.
print("Creating a specific model (LightGBM)...")
lgbm = create_model('lightgbm')

print("\nTuning the LightGBM model...")
tuned_lgbm = tune_model(lgbm)


# --- Step 5: Analyze the Tuned Model ---
# PyCaret offers several plots to analyze model performance.
# Here we'll plot the Confusion Matrix and the AUC curve.
print("\nPlotting model performance...")
# The plots will open in a new window or display inline in a notebook.
plot_model(tuned_lgbm, plot='confusion_matrix')
plot_model(tuned_lgbm, plot='auc')


# --- Step 6: Finalize the Model for Deployment ---
# Finalize trains the model on the entire dataset (including the hold-out set).
print("Finalizing the tuned model...")
final_model = finalize_model(tuned_lgbm)
print("\nFinal Model Parameters:")
print(final_model)


# --- Step 7: Make Predictions ---
# PyCaret's setup() automatically splits the data. Let's see the unseen data.
unseen_data = get_config('X_test')
predictions = predict_model(final_model, data=unseen_data)

print("\nPredictions on unseen data:")
print(predictions.head())

ModuleNotFoundError: No module named 'seaborn'