## Setup

In [38]:
# Import packages
import pandas as pd
from ds_common_utils.aux.io.snowflake_tools import SnowflakeTools
from datetime import datetime
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# Setup connection
con = SnowflakeTools(
    role="INSIGHT_ANALYST_MERCH_DE_GENERAL_PRD",
    warehouse="INSIGHT_ANALYST_WH",
    database="BDWPRD_DE",
    schema="IA_MERCH_DE"
)

In [3]:
# Date range
start_date = "'2024-07-01'"
end_date = "'2025-06-30'"

In [157]:
# Define Bunnings-themed colours and order
bunnings_theme_colors = {
    "Older homeowner": "#004b46",
    "Younger homeowner": "#007f6e",
    "Homeowner older kids": "#002b62",
    "Homeowner younger kids": "#4a90e2",
    "Renter with kids": "#c7332c",
    "Renter no kids": "#ff7f7f"
}
demo_order = list(bunnings_theme_colors.keys())

In [158]:
# Common function for plotting
def plot_index_bar_chart(title, df, seg_col="DEMOGRAPHIC_SEGMENT", sales_col="SEGMENT_SALES", index_col="SALES_INDEX"):
    df = df[df[seg_col].isin(demo_order)].copy()
    df["LABEL"] = df[sales_col].apply(lambda x: f"${x:,.0f}")
    df[seg_col] = pd.Categorical(df[seg_col], categories=demo_order, ordered=True)
    df = df.sort_values(seg_col)
    df["COLOR"] = df[seg_col].map(bunnings_theme_colors)

    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=df[seg_col],
        y=df[index_col],
        text=df["LABEL"],
        textposition="outside",
        marker_color=df["COLOR"],
        name=title
    ))
    fig.add_shape(
        type="line",
        x0=-0.5,
        x1=len(demo_order) - 0.5,
        y0=1,
        y1=1,
        xref='x',
        yref='y',
        line=dict(color="red", dash="dash", width=2),
        layer="below"
    )
    fig.update_layout(
        title=dict(text=title, x=0.5, xanchor='center', font=dict(size=16)),
        xaxis=dict(title=None, showticklabels=False, showgrid=False, zeroline=False, showline=False),
        yaxis=dict(title="Sales Index", showgrid=True, zeroline=False),
        plot_bgcolor="white",
        paper_bgcolor="white",
        height=500,
        width=600,
        showlegend=False
    )
    return fig

## Data

In [4]:
# Set item range table name
table_GM = 'GM_item_range' + datetime.today().strftime('%Y_%m_%d')

In [5]:
# Create temp item table
con.execute_statement_from_sql_file(
    file='sql/0-item-range.sql',
    formatting={'table_name': table_GM,
                'filter' : "AND item_sub_department_name = '400 GROWING MEDIA' "})

snowflake_tools - 2025-07-12 17:53:03.390630+10:00 - Generating new token with 'INSIGHT_ANALYST_MERCH_DE_GENERAL_PRD' role and 'SESSION:ROLE-ANY' scope...
snowflake_tools - 2025-07-12 17:53:09.034195+10:00 - Saved token with '2025-07-12 21:53:09+10:00' expiry...


## Demographic Segments (AU)

### Overall index

In [112]:
ds = con.read_sql_file_into_pandas(
    file="sql/demographic-segments-overall.sql",
    formatting={
        'start_date': start_date,
        'end_date': end_date,
        'filter' : "item_sub_department_name = '400 GROWING MEDIA'"
    }
)

In [113]:
print(f"Total sales: ${ds['RANGE_SALES'].sum():,.2f}")
print(f"Total quantity: {int(ds['RANGE_QUANTITY'].sum()):,}")
print(f"Total customers: {int(ds['RANGE_CUSTOMERS'].sum()):,}")

Total sales: $160,990,982.88
Total quantity: 18,547,988
Total customers: 2,593,309


In [114]:
ds[["DEMOGRAPHIC_SEGMENT", "RANGE_SALES", "RANGE_SALES_SHARE", "SALES_INDEX"]]

Unnamed: 0,DEMOGRAPHIC_SEGMENT,RANGE_SALES,RANGE_SALES_SHARE,SALES_INDEX
0,Homeowner older kids,12662481.43,0.078653,0.83
1,Homeowner younger kids,21654163.48,0.134505,0.72
2,Older homeowner,87779630.91,0.545246,1.31
3,Renter no kids,9344924.16,0.058046,0.65
4,Renter with kids,5130008.35,0.031865,0.52
5,Unclassifiable,1426896.56,0.008863,1.31
6,Younger homeowner,22992877.99,0.142821,0.99


### Class index

In [115]:
GM_ds = con.read_sql_file_into_pandas(
    file="sql/1-demographic-segments.sql",
    formatting={
        'start_date': start_date,
        'end_date': end_date,
        'table_name': table_GM,
        'level' : 'item_class_name',
        'country' : 'AU' 
    }
)

In [116]:
summary = (
    GM_ds[["ITEM_CLASS_NAME", "TOTAL_SALES", "TOTAL_QUANTITY", "TOTAL_CUSTOMERS"]]
    .drop_duplicates(subset="ITEM_CLASS_NAME")
    .rename(columns={
        "TOTAL_SALES": "CLASS_SALES",
        "TOTAL_QUANTITY": "CLASS_QUANTITY",
        "TOTAL_CUSTOMERS": "CLASS_CUSTOMERS"
    })
)

# Compute and print total shares
total_sales = summary["CLASS_SALES"].sum()
total_quantity = summary["CLASS_QUANTITY"].sum()
total_customers = summary["CLASS_CUSTOMERS"].sum()

# Add share columns
summary["CLASS_SALES_SHARE"] = summary["CLASS_SALES"] / total_sales
summary["CLASS_QUANTITY_SHARE"] = summary["CLASS_QUANTITY"] / total_quantity
summary["CLASS_CUSTOMERS_SHARE"] = summary["CLASS_CUSTOMERS"] / total_customers

summary

Unnamed: 0,ITEM_CLASS_NAME,CLASS_SALES,CLASS_QUANTITY,CLASS_CUSTOMERS,CLASS_SALES_SHARE,CLASS_QUANTITY_SHARE,CLASS_CUSTOMERS_SHARE
0,500 COIR MULCH BLOCK HYDRO,1153117.19,60414.0,30305,0.007163,0.003257,0.006824
7,500 COIR POTTING MIX BLOCK,711724.28,84957.0,49574,0.004421,0.00458,0.011164
14,500 COMPOSTS SOIL CONDITIONERS,29790216.64,4178986.0,883163,0.185043,0.225307,0.19888
21,500 MANURE,5822842.96,1102251.0,293689,0.036169,0.059427,0.066136
28,500 MULCHES,39097013.47,3302047.0,743036,0.242852,0.178027,0.167325
35,500 POTTING MIXES,76883162.93,8922037.0,1946982,0.477562,0.481025,0.438442
42,500 SMALL BAG POTTING MIX,7532905.41,897296.0,493936,0.046791,0.048377,0.11123


In [117]:
print(f"Total sales: ${summary['CLASS_SALES'].sum():,.2f}")
print(f"Total quantity: {int(summary['CLASS_QUANTITY'].sum()):,}")

Total sales: $160,990,982.88
Total quantity: 18,547,988


In [118]:
GM_ds[['ITEM_CLASS_NAME', 'DEMOGRAPHIC_SEGMENT', 'SEGMENT_SALES', 'SEGMENT_SALES_SHARE', 'SALES_INDEX']]

Unnamed: 0,ITEM_CLASS_NAME,DEMOGRAPHIC_SEGMENT,SEGMENT_SALES,SEGMENT_SALES_SHARE,SALES_INDEX
0,500 COIR MULCH BLOCK HYDRO,Homeowner older kids,100115.46,0.086822,1.103851
1,500 COIR MULCH BLOCK HYDRO,Homeowner younger kids,180001.79,0.1561,1.160549
2,500 COIR MULCH BLOCK HYDRO,Older homeowner,539830.97,0.468149,0.858603
3,500 COIR MULCH BLOCK HYDRO,Renter no kids,86082.1,0.074652,1.286072
4,500 COIR MULCH BLOCK HYDRO,Renter with kids,42143.87,0.036548,1.14695
5,500 COIR MULCH BLOCK HYDRO,Unclassifiable,7612.58,0.006602,0.744848
6,500 COIR MULCH BLOCK HYDRO,Younger homeowner,197330.42,0.171128,1.198199
7,500 COIR POTTING MIX BLOCK,Homeowner older kids,55577.73,0.078089,0.992823
8,500 COIR POTTING MIX BLOCK,Homeowner younger kids,98097.94,0.137831,1.024727
9,500 COIR POTTING MIX BLOCK,Older homeowner,344955.87,0.484676,0.888914


48% sales comes from 500 POTTING MIXES, 24% from 500 MULCHES, 18% from 500 COMPOSTS SOIL CONDITIONERS, the rest 10% are minimal...	
- Renter no kids under indexed in 500 MANURE, 500 MULCHES
- Renters over indexed in 500 COIR MULCH BLOCK HYDRO, 500 COIR POTTING MIX BLOCK, 500 SMALL BAG POTTING MIX
- Homeowner younger kids over indexed in 500 COMPOSTS SOIL CONDITIONERS

In [159]:
# Call the function for each ITEM_CLASS_NAME in GM_ds
figs = []
for item in GM_ds["ITEM_CLASS_NAME"].unique():
    item_df = GM_ds[GM_ds["ITEM_CLASS_NAME"] == item].copy()
    fig = plot_index_bar_chart(item, item_df)
    figs.append(fig)

# Display one by one
for fig in figs:
    fig.show()

## Non-Commercial (NZ)

### Class index

In [120]:
non_commercial = con.read_sql_file_into_pandas(
    file="sql/non-commercial-overall.sql",
    formatting={
        'start_date': start_date,
        'end_date': end_date,
        'table_name': table_GM,
        'level' : 'item_class_name',
        'country' : 'NZ'
    }
)

In [121]:
non_commercial

Unnamed: 0,ITEM_CLASS_NAME,TOTAL_SALES,TOTAL_QUANTITY,SALES_SHARE,QUANTITY_SHARE
0,500 COIR POTTING MIX BLOCK,142584.35,22271.0,0.007401,0.010284
1,500 COMPOSTS SOIL CONDITIONERS,5441679.07,750366.0,0.282439,0.346487
2,500 MULCHES,3065798.89,301594.0,0.159124,0.139263
3,500 POTTING MIXES,9198240.7,929780.0,0.477415,0.429333
4,500 SMALL BAG POTTING MIX,1418457.7,161629.0,0.073622,0.074633


In [122]:
print(f"Total sales: ${non_commercial['TOTAL_SALES'].sum():,.2f}")
print(f"Total quantity: {int(non_commercial['TOTAL_QUANTITY'].sum()):,}")

Total sales: $19,266,760.71
Total quantity: 2,165,640


## Commercial Industry Segment (NZ)

### Overall index

In [123]:
cis = con.read_sql_file_into_pandas(
    file="sql/commercial-industry-segments-overall.sql",
    formatting={
        'start_date': start_date,
        'end_date': end_date,
        'filter' : "item_sub_department_name = '400 GROWING MEDIA'",
        'country' : 'NZ'
    }
)

In [124]:
print(f"Total sales: ${cis['RANGE_SALES'].sum():,.2f}")
print(f"Total quantity: {int(cis['RANGE_QUANTITY'].sum()):,}")
print(f"Total customers: {int(cis['RANGE_CUSTOMERS'].sum()):,}")

Total sales: $4,039,332.60
Total quantity: 471,169
Total customers: 34,864


In [192]:
n = 8
top_n = cis.nlargest(n, "RANGE_SALES").copy()
formatted_top_n = top_n.copy()

formatted_top_n["Industry Segment"] = formatted_top_n["INDUSTRY_SEGMENT"] 

formatted_top_n["Sales"] = formatted_top_n["RANGE_SALES"] 
formatted_top_n["Sales"] = formatted_top_n["Sales"].map("${:,.0f}".format)

formatted_top_n["Sales Share"] = formatted_top_n["RANGE_SALES_SHARE"] * 100
formatted_top_n["Sales Share"] = formatted_top_n["Sales Share"].map("{:.0f}%".format)

formatted_top_n["Sales Index"] = formatted_top_n["SALES_INDEX"].map("{:.1f}".format)

# Select and reorder columns
formatted_top_n = formatted_top_n[["Industry Segment", "Sales", "Sales Share", "Sales Index"]]
formatted_top_n

Unnamed: 0,Industry Segment,Sales,Sales Share,Sales Index
0,Professional Computer and Scientific Services,"$664,112",16%,2.0
1,Personal and Other Services,"$572,820",14%,2.3
2,Rental Hiring and Real Estate Services,"$327,686",8%,0.9
3,Residential Builder,"$307,160",8%,0.3
4,Retail and Wholesale Trade,"$306,579",8%,1.5
5,Manufacturing,"$289,981",7%,1.4
6,Site Preparation and Development,"$212,007",5%,0.7
7,Landscaper and Gardening Services,"$205,593",5%,1.5


In [185]:
print(f"Top {n} segments - sales share: {top_n['RANGE_SALES_SHARE'].sum():.2%}")
print(f"Top {n} segments - quantity share: {top_n['RANGE_QUANTITY_SHARE'].sum():.2%}")
print(f"Top {n} segments - customer share: {top_n['RANGE_CUSTOMER_SHARE'].sum():.2%}")
print(f"Top {n} segments - size share: {top_n['SEGMENT_SIZE_SHARE'].sum():.2%}")

Top 8 segments - sales share: 71.45%
Top 8 segments - quantity share: 71.34%
Top 8 segments - customer share: 64.48%
Top 8 segments - size share: 45.90%


### Class index

In [163]:
# Get data
GM_is = con.read_sql_file_into_pandas(
    file="sql/2-commercial-industry-segment.sql",
    formatting={
        'start_date': start_date,
        'end_date': end_date,
        'n_commercial_segments': n, 
        'table_name': table_GM,
        'level' : 'item_class_name',
        'country' : 'NZ' 
    }
)

In [164]:
summary = (
    GM_is[["ITEM_CLASS_NAME", "TOTAL_SALES", "TOTAL_QUANTITY", "TOTAL_CUSTOMERS"]]
    .drop_duplicates(subset="ITEM_CLASS_NAME")
    .rename(columns={
        "TOTAL_SALES": "CLASS_SALES",
        "TOTAL_QUANTITY": "CLASS_QUANTITY",
        "TOTAL_CUSTOMERS": "CLASS_CUSTOMERS"
    })
)

# Compute and print total shares
total_sales = summary["CLASS_SALES"].sum()
total_quantity = summary["CLASS_QUANTITY"].sum()
total_customers = summary["CLASS_CUSTOMERS"].sum()

# Add share columns
summary["CLASS_SALES_SHARE"] = summary["CLASS_SALES"] / total_sales
summary["CLASS_QUANTITY_SHARE"] = summary["CLASS_QUANTITY"] / total_quantity
summary["CLASS_CUSTOMERS_SHARE"] = summary["CLASS_CUSTOMERS"] / total_customers

summary

Unnamed: 0,ITEM_CLASS_NAME,CLASS_SALES,CLASS_QUANTITY,CLASS_CUSTOMERS,CLASS_SALES_SHARE,CLASS_QUANTITY_SHARE,CLASS_CUSTOMERS_SHARE
0,500 COIR POTTING MIX BLOCK,13194.96,2198.0,397,0.004816,0.006879,0.011183
8,500 COMPOSTS SOIL CONDITIONERS,826246.16,117969.0,11105,0.301568,0.369215,0.312826
16,500 MULCHES,441012.27,46597.0,4471,0.160963,0.145838,0.125947
24,500 POTTING MIXES,1333531.01,138938.0,15263,0.48672,0.434843,0.429956
32,500 SMALL BAG POTTING MIX,125849.25,13811.0,4263,0.045933,0.043225,0.120088


In [165]:
print(f"Total sales: ${summary['CLASS_SALES'].sum():,.2f}")
print(f"Total quantity: {int(summary['CLASS_QUANTITY'].sum()):,}")
print(f"Total customers: {int(summary['CLASS_CUSTOMERS'].sum()):,}")

Total sales: $2,739,833.65
Total quantity: 319,513
Total customers: 35,499


In [196]:
formatted_top_n = GM_is.copy()
formatted_top_n["Class"] = formatted_top_n["ITEM_CLASS_NAME"] 
formatted_top_n["Industry Segment"] = formatted_top_n["INDUSTRY_SEGMENT"] 

formatted_top_n["Sales"] = formatted_top_n["SEGMENT_SALES"] 
formatted_top_n["Sales"] = formatted_top_n["Sales"].map("${:,.0f}".format)

formatted_top_n["Sales Share"] = formatted_top_n["SEGMENT_SALES_SHARE"] * 100
formatted_top_n["Sales Share"] = formatted_top_n["Sales Share"].map("{:.0f}%".format)

formatted_top_n["Sales Index"] = formatted_top_n["SALES_INDEX"].map("{:.1f}".format)

# Select and reorder columns
formatted_top_n = formatted_top_n[["Class", "Industry Segment", "Sales", "Sales Share", "Sales Index"]]
formatted_top_n

Unnamed: 0,Class,Industry Segment,Sales,Sales Share,Sales Index
0,500 COIR POTTING MIX BLOCK,Commercial Builder,$107,1%,0.4
1,500 COIR POTTING MIX BLOCK,Manufacturing,"$1,796",14%,1.3
2,500 COIR POTTING MIX BLOCK,Personal and Other Services,"$2,893",22%,1.0
3,500 COIR POTTING MIX BLOCK,Professional Computer and Scientific Services,"$3,007",23%,0.9
4,500 COIR POTTING MIX BLOCK,Rental Hiring and Real Estate Services,"$1,272",10%,0.8
5,500 COIR POTTING MIX BLOCK,Residential Builder,"$1,996",15%,1.3
6,500 COIR POTTING MIX BLOCK,Retail and Wholesale Trade,"$1,633",12%,1.1
7,500 COIR POTTING MIX BLOCK,Site Preparation and Development,$492,4%,0.5
8,500 COMPOSTS SOIL CONDITIONERS,Commercial Builder,"$19,289",2%,1.1
9,500 COMPOSTS SOIL CONDITIONERS,Manufacturing,"$83,885",10%,1.0


## Drop item range table

In [197]:
# Drop item range table
con.execute_statement_from_sql_string(
    statement='DROP TABLE bdwprd_de.ia_merch_de.{table_name};',
    formatting={'table_name': table_GM}
    )

snowflake_tools - 2025-07-12 22:35:13.754014+10:00 - Generating new token with 'INSIGHT_ANALYST_MERCH_DE_GENERAL_PRD' role and 'SESSION:ROLE-ANY' scope...
snowflake_tools - 2025-07-12 22:35:20.798183+10:00 - Saved token with '2025-07-13 02:35:20+10:00' expiry...


## Potting Mixes Volume

### AU

In [168]:
pm_au = con.read_sql_file_into_pandas(
    file="sql/potting_mixes_volume.sql",
    formatting={
        'start_date': start_date,
        'end_date': end_date,
        'country' : 'AU'
    }
)

In [169]:
# Recalculate brand + package-level summary
brand_pkg_summary = (
    pm_au.groupby(["BRAND_CODE", "PACKAGE_LITRE"])
    .agg(
        total_sales=("TOTAL_SALES", "sum"),
        avg_volume_per_trx=("ESTIMATED_LITRES_PER_TRX", "mean"),
        avg_qty_per_trx=("AVG_QTY_PER_TRX", "mean"),
        total_transactions=("TOTAL_TRX", "sum"),
        avg_price_per_litre=("AVG_PRICE_PER_LITRE", "mean")
    )
    .reset_index()
)

brand_pkg_summary = brand_pkg_summary[brand_pkg_summary["total_sales"] > 0]

In [170]:
package_order = sorted(brand_pkg_summary["PACKAGE_LITRE"].unique())

brand_order = (
    brand_pkg_summary.groupby("BRAND_CODE")["total_sales"]
    .sum()
    .sort_values(ascending=False)
    .index.tolist()
)

fig = px.scatter(
    brand_pkg_summary,
    x="PACKAGE_LITRE",
    y="BRAND_CODE",
    size="total_sales",
    color="avg_volume_per_trx",
    color_continuous_scale="RdYlGn",
    category_orders={
        "PACKAGE_LITRE": package_order,
        "BRAND_CODE": brand_order
    },
    hover_data={
        "total_sales": ":,.0f",
        "avg_price_per_litre": ":.2f",
        "total_transactions": ":,",
        "avg_qty_per_trx": ":.2f",
        "avg_volume_per_trx": ":.2f",
        "PACKAGE_LITRE": False,
        "BRAND_CODE": False
    },
        labels={
        "total_sales": "Total Sales ($) ",
        "avg_price_per_litre": "Avg $/L ",
        "total_transactions": "Total Trx ",
        "avg_qty_per_trx": "Avg Units/Trx ",
        "avg_volume_per_trx": "Avg Volume/Trx (L) "
    },
    title="AU Potting Mixes: Sales Volume by Brand & Package Size"
)

fig.update_layout(
    width=800,
    height=800,
    template="plotly_white",
    font=dict(family="Arial", size=16, color="#333"),
    title_font=dict(size=25),
    xaxis=dict(
        tickmode="array",
        tickvals=package_order,
        ticktext=[f"{x}L" for x in package_order],
        title="",
        tickfont=dict(size=16),
        titlefont=dict(size=14)
    ),
    yaxis=dict(
        title="",
        categoryorder="array",
        categoryarray=brand_order,
        tickfont=dict(size=16),
        titlefont=dict(size=14)
    ),
    coloraxis_colorbar=dict(
        title="Avg Volume/Trx (L)",
        titlefont=dict(size=16),
        tickfont=dict(size=14)
    )
)

# Optional: double bubble size (by halving sizeref)
max_sales = brand_pkg_summary["total_sales"].max()
desired_size_max = 30  # same unit as original size_max

fig.update_traces(
    marker=dict(
        sizemode="area",
        sizeref=0.25 * max_sales / desired_size_max**2,
        sizemin=5
    )
)

fig.show()

In [171]:
fig.write_html("potting_mix_volume_by_brand_AU.html")

### NZ

In [172]:
pm_nz = con.read_sql_file_into_pandas(
    file="sql/potting_mixes_volume.sql",
    formatting={
        'start_date': start_date,
        'end_date': end_date,
        'country' : 'NZ'
    }
)

In [173]:
# Recalculate brand + package-level summary
brand_pkg_summary = (
    pm_nz.groupby(["BRAND_CODE", "PACKAGE_LITRE"])
    .agg(
        total_sales=("TOTAL_SALES", "sum"),
        avg_volume_per_trx=("ESTIMATED_LITRES_PER_TRX", "mean"),
        avg_qty_per_trx=("AVG_QTY_PER_TRX", "mean"),
        total_transactions=("TOTAL_TRX", "sum"),
        avg_price_per_litre=("AVG_PRICE_PER_LITRE", "mean")
    )
    .reset_index()
)

brand_pkg_summary = brand_pkg_summary[brand_pkg_summary["total_sales"] > 0]

In [199]:
package_order = sorted(brand_pkg_summary["PACKAGE_LITRE"].astype(int).unique())

brand_order = (
    brand_pkg_summary.groupby("BRAND_CODE")["total_sales"]
    .sum()
    .sort_values(ascending=False)
    .index.tolist()
)

fig = px.scatter(
    brand_pkg_summary,
    x="PACKAGE_LITRE",
    y="BRAND_CODE",
    size="total_sales",
    color="avg_volume_per_trx",
    color_continuous_scale="RdYlGn",
    category_orders={
        "PACKAGE_LITRE": package_order,
        "BRAND_CODE": brand_order
    },
    hover_data={
        "total_sales": ":,.0f",
        "avg_price_per_litre": ":.2f",
        "total_transactions": ":,",
        "avg_qty_per_trx": ":.2f",
        "avg_volume_per_trx": ":.2f",
        "PACKAGE_LITRE": False,
        "BRAND_CODE": False
    },
        labels={
        "total_sales": "Total Sales ($) ",
        "avg_price_per_litre": "Avg $/L ",
        "total_transactions": "Total Trx ",
        "avg_qty_per_trx": "Avg Units/Trx ",
        "avg_volume_per_trx": "Avg Volume/Trx (L) "
    },
    title="NZ Potting Mixes: Sales Volume by Brand & Package Size"
)

fig.update_layout(
    width=800,
    height=800,
    template="plotly_white",
    font=dict(family="Arial", size=16, color="#333"),
    title_font=dict(size=25),
    xaxis=dict(
        tickmode="array",
        tickvals=package_order,
        ticktext=[f"{x}L" for x in package_order],
        title="",
        tickfont=dict(size=16),
        titlefont=dict(size=14)
    ),
    yaxis=dict(
        title="",
        categoryorder="array",
        categoryarray=brand_order,
        tickfont=dict(size=16),
        titlefont=dict(size=14)
    ),
    coloraxis_colorbar=dict(
        title="Avg Volume/Trx (L)",
        titlefont=dict(size=16),
        tickfont=dict(size=14)
    )
)

# Optional: double bubble size (by halving sizeref)
max_sales = brand_pkg_summary["total_sales"].max()
desired_size_max = 30  # same unit as original size_max

fig.update_traces(
    marker=dict(
        sizemode="area",
        sizeref=0.25 * max_sales / desired_size_max**2,
        sizemin=5
    )
)

fig.show()

In [175]:
fig.write_html("potting_mix_volume_by_brand_NZ.html")

In [176]:
summary_by_package_full = (
    pm_nz.groupby('PACKAGE_LITRE')
    .agg(
        total_sales=('TOTAL_SALES', 'sum'),
        total_transactions=('TOTAL_TRX', 'sum'),
        avg_units_per_trx=('AVG_QTY_PER_TRX', 'mean'),
        avg_volume_per_trx=('ESTIMATED_LITRES_PER_TRX', 'mean'),
        avg_price_per_litre=('AVG_PRICE_PER_LITRE', 'mean'),
        num_items=('ITEM_NUMBER', 'nunique')
    )
    .reset_index()
    .sort_values(by='total_transactions', ascending=False)
)

summary_by_package_full

Unnamed: 0,PACKAGE_LITRE,total_sales,total_transactions,avg_units_per_trx,avg_volume_per_trx,avg_price_per_litre,num_items
2,40.0,7316119.72,425854,1.758764,70.350558,11.25182,6
1,30.0,3120957.5,162227,1.663605,49.90816,12.269456,16
0,25.0,617112.92,38718,1.446886,36.172142,11.159338,8


In [177]:
# Summarise volume trend by brand
volume_summary = (
    pm_nz.groupby(['BRAND_CODE', 'PACKAGE_LITRE'])
    .agg(
        avg_qty=('AVG_QTY_PER_TRX', 'mean'),
        total_trx=('TOTAL_TRX', 'sum'),
        avg_volume_per_trx=('ESTIMATED_LITRES_PER_TRX', 'mean'),
        unique_items=('ITEM_NUMBER', 'nunique')
    )
    .reset_index()
    .sort_values(['avg_volume_per_trx', 'BRAND_CODE'])
)
volume_summary

Unnamed: 0,BRAND_CODE,PACKAGE_LITRE,avg_qty,total_trx,avg_volume_per_trx,unique_items
1,Beut,30.0,1.0,1,30.0,1
15,SCOTTS,25.0,1.341671,8389,33.541787,2
13,OSMOCOTE,25.0,1.45933,28528,36.48326,4
3,DALTONS MEDIA,25.0,1.52721,1801,38.180262,2
16,SCOTTS,30.0,1.453972,2379,43.619168,1
12,NATURES WAY,30.0,1.538915,9639,46.167439,2
4,DALTONS MEDIA,30.0,1.547823,34973,46.434678,1
8,GARDEN BASICS,30.0,1.630816,15667,48.924491,1
6,DALTONS PREMIUM,30.0,1.641211,24754,49.236326,2
9,GARDEN TIME MEDIA,30.0,1.696674,62249,50.900234,4
