In [27]:
#load libraries 
import pandas as pd
import altair as alt
import numpy as np


<strong><h2>Which months and sales channels deliver the highest promotional lift in merchandise revenue?</strong></h2>

In [29]:
stadium = pd.read_csv("clean_stadium.csv")
stadium.head()

Unnamed: 0,Month,Source,Revenue
0,1,Upper Bowl,25020
1,2,Upper Bowl,686664
2,3,Upper Bowl,434112
3,4,Upper Bowl,390879
4,5,Upper Bowl,286524


In [31]:
fanbase = pd.read_csv("clean_fanbase.csv")
fanbase.head()

Unnamed: 0,Membership_ID,Age_Group,Games_Attended,Seasonal_Pass,Customer_Region
0,10218708,18-25,3,False,Domestic
1,10043567,18-25,2,False,Domestic
2,14272475,41-60,2,False,Domestic
3,10034985,18-25,5,False,Domestic
4,10004699,18-25,7,False,Domestic


In [33]:
merch = pd.read_csv("clean_merch.csv")

In [35]:
merch["Selling_Date"] = pd.to_datetime(merch["Selling_Date"], errors="coerce")
merch["Month"] = merch["Selling_Date"].dt.month

promo_summary = (
    merch.groupby(["Month", "Channel", "Promotion"], as_index=False)
         .agg(Total_Sales=("Unit_Price", "sum"))
)

# pivot promo and non-promo side by side 
pivoted = promo_summary.pivot_table(
    index=["Month", "Channel"],
    columns="Promotion",
    values="Total_Sales",
    fill_value=0
).reset_index()

pivoted.columns.name = None
pivoted = pivoted.rename(columns={False: "Non_Promo_Sales", True: "Promo_Sales"})

# calculate portion promo share % (portion of total rev that came from promos) 
pivoted["Promo_Share_%"] = (
    pivoted["Promo_Sales"] /
    (pivoted["Promo_Sales"] + pivoted["Non_Promo_Sales"])
) * 100


In [37]:
base = alt.Chart(pivoted).encode(
    x=alt.X("Month:O", title="Month"),
    y=alt.Y("Channel:N", title="Sales Channel")
)

rect = base.mark_rect().encode(
    color=alt.Color(
        "Promo_Share_%:Q",
        scale=alt.Scale(scheme="blueorange", domain=[0,50]),
        title="Promo Share (%)"
    )
)

text = base.mark_text(
    baseline="middle",
    color="white"
).encode(
    text=alt.Text("Promo_Share_%:Q", format=".1f")
)

#add layers together
(rect + text).properties(
    width=500,
    height=300,
    title="Promo Share % by Channel and Month"
)


Across all months, about half of all online sales came from promotions every month, meaning the online score is fairly dependent on promotions to generate sales. This means customers respond to marketing, ads, events, but reducing promotions may lead to a decrease in online sales. 

There weren't any sales that came from promotions from the in-person store puchases. This means there may be a missed opportunity for in-store promotions that can be explored.

<strong><h2>Do promotions attract casual fans or loyal fans?</strong></h2>

In [39]:
merged_promo = merch.merge(
    fanbase[['Membership_ID', 'Games_Attended', 'Seasonal_Pass']], 
    left_on='Member_ID', 
    right_on='Membership_ID', 
    how='left'
)

# define loyalty segments based on attendance and season pass
def classify_loyalty(row):
    if pd.isna(row['Games_Attended']):
        return 'Unknown'
    elif row['Seasonal_Pass'] and row['Games_Attended'] >= 21:
        return 'Loyal (21+ games)'
    elif row['Seasonal_Pass'] and row['Games_Attended'] >= 11:
        return 'Loyal (11-20 games)'
    elif row['Games_Attended'] >= 21:
        return 'Engaged (21+ games)'
    elif row['Games_Attended'] >= 11:
        return 'Engaged (11-20 games)'
    elif row['Games_Attended'] >= 6:
        return 'Moderate (6-10 games)'
    else:
        return 'Casual (0-5 games)'

merged_promo['Loyalty_Segment'] = merged_promo.apply(classify_loyalty, axis=1)

# calculate promotion effectiveness by loyalty segment
promo_by_loyalty = (
    merged_promo.groupby(['Loyalty_Segment', 'Promotion'], as_index=False)
    .agg(
        Total_Sales=('Unit_Price', 'sum'),
        Num_Purchases=('Unit_Price', 'count')
    )
)

# compare promo and non-promo
promo_pivot = promo_by_loyalty.pivot_table(
    index='Loyalty_Segment',
    columns='Promotion',
    values=['Total_Sales', 'Num_Purchases'],
    fill_value=0
).reset_index()


promo_pivot.columns = ['_'.join(map(str, col)).strip('_') if col[1] != '' else col[0] 
                        for col in promo_pivot.columns.values]

# Calculate promotion share
promo_pivot['Promo_Sales_Share_%'] = (
    promo_pivot['Total_Sales_True'] / 
    (promo_pivot['Total_Sales_True'] + promo_pivot['Total_Sales_False'])
) * 100

promo_pivot['Promo_Purchase_Share_%'] = (
    promo_pivot['Num_Purchases_True'] / 
    (promo_pivot['Num_Purchases_True'] + promo_pivot['Num_Purchases_False'])
) * 100

# create different loyalty levels
loyalty_order = ['Casual (0-5 games)', 'Moderate (6-10 games)', 
                 'Engaged (11-20 games)', 'Engaged (21+ games)',
                 'Loyal (11-20 games)', 'Loyal (21+ games)', 'Unknown']
promo_pivot['Loyalty_Segment'] = pd.Categorical(
    promo_pivot['Loyalty_Segment'], 
    categories=loyalty_order, 
    ordered=True
)
promo_pivot = promo_pivot.sort_values('Loyalty_Segment')

# create viz
chart = alt.Chart(promo_pivot[promo_pivot['Loyalty_Segment'] != 'Unknown']).mark_bar().encode(
    x=alt.X('Loyalty_Segment:N', 
            title='Fan Loyalty Segment',
            sort=loyalty_order),
    y=alt.Y('Promo_Sales_Share_%:Q', 
            title='Promotion Share of Sales Revenue (%)',
            scale=alt.Scale(domain=[0, 60])),
    color=alt.Color('Loyalty_Segment:N',
                    scale=alt.Scale(scheme='tableau10'),
                    legend=None),
    tooltip=[
        alt.Tooltip('Loyalty_Segment:N', title='Loyalty Segment'),
        alt.Tooltip('Promo_Sales_Share_%:Q', title='Promo Sales Share (%)', format='.1f'),
        alt.Tooltip('Total_Sales_True:Q', title='Promo Sales ($)', format=',.0f'),
        alt.Tooltip('Total_Sales_False:Q', title='Non-Promo Sales ($)', format=',.0f')
    ]
).properties(
    width=500,
    height=350,
    title='Promotion Effectiveness by Fan Loyalty Level'
)


text = chart.mark_text(
    align='center',
    baseline='bottom',
    dy=-5,
    color='black',
    fontSize=11,
    fontWeight='bold'
).encode(
    text=alt.Text('Promo_Sales_Share_%:Q', format='.1f')
)

#add layers tgt
final_chart = (chart + text)
final_chart

Promotions attract everyone equally - casual fans, moderate fans, and loyal fans all respond to promotions at basically the same rate. There is no targeting opportunity.

<strong><h2>When is the stadium underutilized, and what's the revenue opportunity from hosting non-matchday events?</strong></h2>

In [41]:
# filter to revenue-generating sources 
revenue_sources = ['Upper Bowl', 'Lower Bowl', 'Premium Seating', 
                   'Concessions', 'Hospitality', 'Merchandise Kiosks']

stadium_revenue = stadium[stadium['Source'].isin(revenue_sources)].copy()

# calc rev per month
monthly_revenue = stadium_revenue.groupby('Month', as_index=False)['Revenue'].sum()
monthly_revenue.columns = ['Month', 'Total_Revenue']

# recode months
month_names = {
    1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 
    5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug',
    9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'
}
monthly_revenue['Month_Name'] = monthly_revenue['Month'].map(month_names)

# calculate utilization based on the peak revenue month 
max_revenue = monthly_revenue['Total_Revenue'].max()
monthly_revenue['Utilization_%'] = (monthly_revenue['Total_Revenue'] / max_revenue) * 100
monthly_revenue['Revenue_Gap'] = max_revenue - monthly_revenue['Total_Revenue']

# create different utilization categories 
monthly_revenue['Utilization_Level'] = pd.cut(
    monthly_revenue['Utilization_%'],
    bins=[0, 40, 70, 100],
    labels=['Low (<40%)', 'Medium (40-70%)', 'High (70%+)']
)

# create viz
base = alt.Chart(monthly_revenue).encode(
    x=alt.X('Month:O', 
            title='Month',
            axis=alt.Axis(labelExpr="['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'][datum.value-1]"))
)

bars = base.mark_bar().encode(
    y=alt.Y('Total_Revenue:Q', 
            title='Total Stadium Revenue ($)',
            scale=alt.Scale(domain=[0, max_revenue * 1.1]),
            axis=alt.Axis(format='$,.0s')),
    color=alt.Color('Utilization_Level:N',
                    scale=alt.Scale(
                        domain=['Low (<40%)', 'Medium (40-70%)', 'High (70%+)'],
                        range=['#e74c3c', '#f39c12', '#27ae60']
                    ),
                    title='Stadium Utilization'),
    tooltip=[
        alt.Tooltip('Month_Name:N', title='Month'),
        alt.Tooltip('Total_Revenue:Q', title='Revenue', format='$,.0f'),
        alt.Tooltip('Utilization_%:Q', title='% of Peak Month', format='.1f'),
        alt.Tooltip('Revenue_Gap:Q', title='Gap from Peak', format='$,.0f')
    ]
)

text = base.mark_text(
    align='center',
    baseline='bottom',
    dy=-5,
    fontSize=11,
    fontWeight='bold'
).encode(
    y=alt.Y('Total_Revenue:Q'),
    text=alt.Text('Utilization_%:Q', format='.0f'),
    color=alt.value('black')
)

#add layers
chart = (bars + text).properties(
    width=650,
    height=400,
    title={
        "text": "Stadium Revenue & Utilization by Month (2024)",
        "subtitle": "Percentage shows utilization relative to peak month"
    }
)

chart

Our analysis identified four critically underutilized months: January, June, November, and December—with December showing nearly zero activity. While the stadium performs strongly during the soccer season from February through October, it sits virtually empty for one-third of the year. Peak months generate approximately 4 million dollars in revenue, while these low months average only 1 million, creating a 3 million dollar monthly gap. Across the four underperforming months, this represents over 12 million dollars in annual missed opportunity.

<strong><h2>What are the merchandise sales like during these dead months?<strong><h2>

In [43]:
merch['Month'] = pd.to_datetime(merch['Selling_Date']).dt.month

# calculate monthly merchandise revenue
monthly_merch = merch.groupby('Month', as_index=False)['Unit_Price'].sum()
monthly_merch.columns = ['Month', 'Merch_Revenue']

# recode months
month_names = {
    1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 
    5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug',
    9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'
}
monthly_merch['Month_Name'] = monthly_merch['Month'].map(month_names)

# normalize both metrics to compare patterns
max_merch = monthly_merch['Merch_Revenue'].max()
monthly_merch['Merch_Utilization_%'] = (monthly_merch['Merch_Revenue'] / max_merch) * 100

comparison = monthly_revenue[['Month', 'Month_Name', 'Total_Revenue', 'Utilization_%']].merge(
    monthly_merch[['Month', 'Merch_Revenue', 'Merch_Utilization_%']], 
    on='Month'
)

# identify off-season months
dead_months = [1, 6, 11, 12]
comparison['Stadium_Status'] = comparison['Month'].apply(
    lambda x: 'Dead Stadium Months' if x in dead_months else 'Active Stadium Months'
)

# create viz
base = alt.Chart(comparison).encode(
    x=alt.X('Month:O', 
            title='Month',
            axis=alt.Axis(labelExpr="['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'][datum.value-1]"))
)

stadium_bars = base.mark_bar(opacity=0.6, color='#3498db').encode(
    y=alt.Y('Utilization_%:Q', 
            title='Stadium Utilization (%)',
            axis=alt.Axis(titleColor='#3498db')),
    tooltip=[
        alt.Tooltip('Month_Name:N', title='Month'),
        alt.Tooltip('Total_Revenue:Q', title='Stadium Revenue', format='$,.0f'),
        alt.Tooltip('Utilization_%:Q', title='Stadium Utilization %', format='.1f')
    ]
)

merch_line = base.mark_line(
    color='#e74c3c', 
    strokeWidth=3,
    point=alt.OverlayMarkDef(color='#e74c3c', size=80)
).encode(
    y=alt.Y('Merch_Utilization_%:Q',
            title='Merchandise Utilization (%)',
            axis=alt.Axis(titleColor='#e74c3c')),
    tooltip=[
        alt.Tooltip('Month_Name:N', title='Month'),
        alt.Tooltip('Merch_Revenue:Q', title='Merch Revenue', format='$,.0f'),
        alt.Tooltip('Merch_Utilization_%:Q', title='Merch Utilization %', format='.1f')
    ]
)

# combine layers
chart = alt.layer(stadium_bars, merch_line).resolve_scale(
    y='independent'
).properties(
    width=700,
    height=400,
    title={
        "text": "Stadium vs Merchandise Performance by Month",
        "subtitle": "Blue bars = Stadium utilization | Red line = Merchandise sales"
    }
)

chart

In general, the utilization of the stadium and merchandise move together. During off-seasons, the fans don't attend games, buy merchandise, or purchase from the stadium. Compared to peak months, merch drops 60-80%, indicating fans completely disengage from the brand. Therefore, BSL should target new audiences, while using engagement strategies to retain engagement from current fans.