In [2]:
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

# --- Global config for consistent notebook and saved plots ---
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.dpi'] = 150
plt.rcParams['font.size'] = 9
sns.set_style("whitegrid")
sns.set_context("notebook", font_scale=0.8)

# Load data
df_encoded = pd.read_csv("DD_Python_2.csv")

# Define colors for Outcome
outcome_colors = {
    'Attended': '#A9D18E',
    'Booked but did not attend': '#ED7D31',
    'Declined': '#C00000'
}

# Variables to plot against Outcome
variables = [
    'Engagement_with_FH',
    'Ethnicity',
    'Employment_Status',
    'IDACI',
    'Children_in_Family',
    'Family_Composition',
    'Distance_to_Centre_(m)'   # NEW VARIABLE
]

for var in variables:
    fig, ax = plt.subplots(figsize=(8, 6))  # separate figure per variable

    if df_encoded[var].dtype.name == 'category' or df_encoded[var].dtype == object:
        # Raw counts and percentages
        counts = df_encoded.groupby(var)['Outcome'].value_counts().unstack(fill_value=0)
        percentages = counts.div(counts.sum(axis=1), axis=0)

        # Plot percentages
        bars = percentages.plot(
            kind='bar',
            stacked=True,
            color=[outcome_colors.get(x, 'grey') for x in percentages.columns],
            ax=ax
        )

        ax.set_ylabel('Proportion')
        ax.set_xlabel(var)

        # Annotate bars with raw counts
        for container, outcome in zip(ax.containers, counts.columns):
            labels = counts[outcome]
            ax.bar_label(
                container,
                labels=labels,
                label_type="center",
                fontsize=7,
                color="black"
            )

        # Legend outside
        ax.legend(
            title="Outcome",
            fontsize=7,
            title_fontsize=8,
            loc="center left",
            bbox_to_anchor=(1.0, 0.5)
        )

    else:
        # Numeric: boxplot
        sns.boxplot(
            x='Outcome',
            y=var,
            data=df_encoded,
            order=['Attended', 'Booked but did not attend', 'Declined'],
            palette=outcome_colors,
            ax=ax
        )
        ax.set_xlabel("Outcome")
        ax.set_ylabel(var)

    ax.set_title(f'{var} vs Outcome', fontsize=10)

    plt.tight_layout()
    # Save each plot as a separate PNG
    fig.savefig(f"{var}_vs_outcome.png", bbox_inches="tight")
    plt.close(fig)  # close figure to avoid overlap in loop



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(
