In [None]:
print("\n" + "="*70)
print("--- Cross-Comparison Study 1: Did the models learn what our EDA showed? ---")
print("="*70)

# --- Part A: Compare against 'job' feature ---

# 1. Get the EDA result (already calculated in the EDA notebook, let's recalculate for clarity)
job_success_rate = raw_df.groupby('job')['y_numeric'].mean().sort_values(ascending=False)

# 2. Get the feature importances from the champion model
champion_name = results_df.loc[0, 'Model'] # Assumes results_df is sorted by F1-score
champion_data = models_predictions[champion_name]

# 3. Create the side-by-side plot
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('<b>EDA:</b> Actual Subscription Rate by Job', f'<b>{champion_name}:</b> Top Feature Importances')
)

# Plot 1: The EDA result
fig.add_trace(
    go.Bar(x=job_success_rate.index, y=job_success_rate.values, name='Actual Rate'),
    row=1, col=1
)

# Plot 2: The model's feature importances
try:
    # This logic is copied from our feature importance plot cell
    classifier = champion_data['model'].named_steps['classifier']
    if 'rfe' in champion_data['model'].named_steps:
        support = champion_data['model'].named_steps['rfe'].support_
        feature_names = X_test.columns[support]
    else:
        feature_names = X_test.columns
    importances = classifier.feature_importances_
    
    importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances}).sort_values('Importance', ascending=False).head(10)
    
    fig.add_trace(
        go.Bar(x=importance_df['Feature'], y=importance_df['Importance'], name='Feature Importance'),
        row=1, col=2
    )
    
    fig.update_layout(title_text="<b>Cross-Comparison: EDA vs. Champion Model's Logic</b>", title_x=0.5, showlegend=False, height=600)
    fig.update_yaxes(title_text="Subscription Rate", tickformat=".0%", row=1, col=1)
    fig.update_yaxes(title_text="Importance Score", row=1, col=2)
    
    print("Generated EDA vs. Feature Importance comparison plot.")
    fig.show()

except Exception as e:
    print(f"Could not generate feature importance comparison. The champion model may not be a tree-based model. Error: {e}")