In [1]:
# Load libraries
import pandas as pd
import altair as alt
import numpy as np


In [2]:
# Step 1: Calculate total attendees from fanbase engagement data
fanbase = pd.read_csv("Byte_Datasets/clean_fanbase.csv")

# Sum all games attended across all members
total_attendees = fanbase["Games_Attended"].sum()

print(f"Total Attendees (from Fanbase Engagement): {total_attendees:,}")
print(f"Number of members in database: {len(fanbase):,}")


Total Attendees (from Fanbase Engagement): 399,799
Number of members in database: 70,000


In [3]:
# Step 2: Extract monthly food revenue from stadium operations
stadium = pd.read_csv("Byte_Datasets/clean_stadium.csv")

# Filter for Food revenue only
food_revenue = stadium[stadium["Source"] == "Food"].copy()
food_revenue = food_revenue.sort_values("Month")

print("\nMonthly Food Revenue:")
print(food_revenue)



Monthly Food Revenue:
    Month Source  Revenue
48      1   Food   443982
49      2   Food  2777643
50      3   Food  2596206
51      4   Food  2227200
52      5   Food  1723476
53      6   Food  1208020
54      7   Food  1469214
55      8   Food  2357364
56      9   Food  2001792
57     10   Food  2844126
58     11   Food   299296
59     12   Food        0


In [4]:
# Step 3: Calculate proportional distribution of attendees
# Total food revenue across all months
total_food_revenue = food_revenue["Revenue"].sum()

# Calculate proportion of total food revenue for each month
food_revenue["Revenue_Proportion"] = food_revenue["Revenue"] / total_food_revenue

# Distribute total attendees proportionally based on food revenue
food_revenue["Estimated_Attendees"] = (food_revenue["Revenue_Proportion"] * total_attendees).round().astype(int)

# Display the distribution
print(f"\nTotal Food Revenue: ${total_food_revenue:,.2f}")
print(f"\nMonthly Attendee Distribution:\n")
print(food_revenue[["Month", "Revenue", "Revenue_Proportion", "Estimated_Attendees"]])



Total Food Revenue: $19,948,319.00

Monthly Attendee Distribution:

    Month  Revenue  Revenue_Proportion  Estimated_Attendees
48      1   443982            0.022257                 8898
49      2  2777643            0.139242                55669
50      3  2596206            0.130147                52032
51      4  2227200            0.111649                44637
52      5  1723476            0.086397                34541
53      6  1208020            0.060557                24211
54      7  1469214            0.073651                29446
55      8  2357364            0.118174                47246
56      9  2001792            0.100349                40119
57     10  2844126            0.142575                57001
58     11   299296            0.015004                 5998
59     12        0            0.000000                    0


In [5]:
# Summary statistics
print("\nSummary Statistics:")
print(f"Average attendees per month: {food_revenue['Estimated_Attendees'].mean():,.0f}")
print(f"Peak attendance month: {food_revenue.loc[food_revenue['Estimated_Attendees'].idxmax(), 'Month']} ({food_revenue['Estimated_Attendees'].max():,} attendees)")
print(f"Lowest attendance month: {food_revenue.loc[food_revenue['Estimated_Attendees'].idxmin(), 'Month']} ({food_revenue['Estimated_Attendees'].min():,} attendees)")
print(f"\nVerification - Total distributed: {food_revenue['Estimated_Attendees'].sum():,} (should match {total_attendees:,})")



Summary Statistics:
Average attendees per month: 33,316
Peak attendance month: 10 (57,001 attendees)
Lowest attendance month: 12 (0 attendees)

Verification - Total distributed: 399,798 (should match 399,799)


In [6]:
# Step 4: Create visualization
alt.data_transformers.disable_max_rows()

# Create month labels for better readability
month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 
               7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
food_revenue["Month_Label"] = food_revenue["Month"].map(month_names)

# Create bar chart
chart = alt.Chart(food_revenue).mark_bar(color='#3498db').encode(
    x=alt.X('Month:O', title='Month', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('Estimated_Attendees:Q', title='Estimated Attendees'),
    tooltip=[
        alt.Tooltip('Month_Label:N', title='Month'),
        alt.Tooltip('Estimated_Attendees:Q', title='Attendees', format=','),
        alt.Tooltip('Revenue:Q', title='Food Revenue', format='$,.0f'),
        alt.Tooltip('Revenue_Proportion:Q', title='Revenue %', format='.1%')
    ]
).properties(
    title='Monthly Attendee Distribution (Based on Food Revenue Proportions)',
    width=700,
    height=400
)

# Add text labels on top of bars
text = chart.mark_text(
    align='center',
    baseline='bottom',
    dy=-5,
    fontSize=11
).encode(
    text=alt.Text('Estimated_Attendees:Q', format=',')
)

final_chart = chart + text
final_chart


In [7]:
# Additional visualization: Side-by-side comparison of food revenue and attendees
base = food_revenue.copy()

# Create normalized values for comparison
revenue_chart = alt.Chart(base).mark_bar(color='#2ecc71').encode(
    x=alt.X('Month:O', title='Month'),
    y=alt.Y('Revenue:Q', title='Food Revenue ($)', axis=alt.Axis(format='$,.0f')),
    tooltip=[
        alt.Tooltip('Month_Label:N', title='Month'),
        alt.Tooltip('Revenue:Q', title='Food Revenue', format='$,.0f')
    ]
).properties(
    title='Monthly Food Revenue',
    width=350,
    height=300
)

attendee_chart = alt.Chart(base).mark_bar(color='#3498db').encode(
    x=alt.X('Month:O', title='Month'),
    y=alt.Y('Estimated_Attendees:Q', title='Estimated Attendees', axis=alt.Axis(format=',')),
    tooltip=[
        alt.Tooltip('Month_Label:N', title='Month'),
        alt.Tooltip('Estimated_Attendees:Q', title='Attendees', format=',')
    ]
).properties(
    title='Monthly Estimated Attendees',
    width=350,
    height=300
)

comparison = alt.hconcat(revenue_chart, attendee_chart)
comparison
