In [1]:
# Load libraries
import pandas as pd
import altair as alt
import numpy as np


In [2]:
# Step 1: Calculate attendees per month (from previous analysis)
fanbase = pd.read_csv("Byte_Datasets/clean_fanbase.csv")
total_attendees = fanbase["Games_Attended"].sum()

# Load stadium data
stadium = pd.read_csv("Byte_Datasets/clean_stadium.csv")

# Get food revenue to calculate monthly attendee distribution
food_revenue = stadium[stadium["Source"] == "Food"].copy()
food_revenue = food_revenue.sort_values("Month")
total_food_revenue = food_revenue["Revenue"].sum()
food_revenue["Revenue_Proportion"] = food_revenue["Revenue"] / total_food_revenue
food_revenue["Estimated_Attendees"] = (food_revenue["Revenue_Proportion"] * total_attendees).round().astype(int)

print(f"Total Attendees: {total_attendees:,}")
print("\nMonthly Attendees:")
print(food_revenue[["Month", "Estimated_Attendees"]])


Total Attendees: 399,799

Monthly Attendees:
    Month  Estimated_Attendees
48      1                 8898
49      2                55669
50      3                52032
51      4                44637
52      5                34541
53      6                24211
54      7                29446
55      8                47246
56      9                40119
57     10                57001
58     11                 5998
59     12                    0


In [3]:
# Step 2: Calculate total operational costs per month
# Operational cost sources: Staff, Insurance, Maintenance, Utilities, Advertising

operational_sources = ["Staff", "Insurance", "Maintenance", "Utilities", "Advertising"]
operational_costs = stadium[stadium["Source"].isin(operational_sources)].copy()

# Group by month and sum all operational costs
monthly_operational_costs = operational_costs.groupby("Month")["Revenue"].sum().reset_index()
monthly_operational_costs.columns = ["Month", "Total_Operational_Cost"]

print("\nMonthly Operational Costs (Staff + Insurance + Maintenance + Utilities + Advertising):")
print(monthly_operational_costs)
print(f"\nTotal Annual Operational Costs: ${monthly_operational_costs['Total_Operational_Cost'].sum():,.2f}")



Monthly Operational Costs (Staff + Insurance + Maintenance + Utilities + Advertising):
    Month  Total_Operational_Cost
0       1                -4256493
1       2                -3835770
2       3                -3682023
3       4                -3792475
4       5                -3949920
5       6                -3851174
6       7                -4145845
7       8                -4096397
8       9                -3815151
9      10                -3981068
10     11                -4455473
11     12                -3945202

Total Annual Operational Costs: $-47,806,991.00


In [4]:
# Step 3: Calculate marginal cost per attendee
# Marginal Cost per Attendee = Total Operational Cost / Number of Attendees

# Merge operational costs with attendee data
cost_analysis = monthly_operational_costs.merge(
    food_revenue[["Month", "Estimated_Attendees"]], 
    on="Month"
)

# Calculate marginal cost per attendee
# Handle division by zero for months with no attendees
cost_analysis["Marginal_Cost_Per_Attendee"] = np.where(
    cost_analysis["Estimated_Attendees"] > 0,
    cost_analysis["Total_Operational_Cost"] / cost_analysis["Estimated_Attendees"],
    0
)

print("\nMarginal Cost Per Attendee by Month:")
print(cost_analysis)

# Overall statistics
valid_months = cost_analysis[cost_analysis["Estimated_Attendees"] > 0]
print(f"\nAverage Marginal Cost Per Attendee: ${valid_months['Marginal_Cost_Per_Attendee'].mean():.2f}")
print(f"Highest Marginal Cost: ${valid_months['Marginal_Cost_Per_Attendee'].max():.2f} (Month {valid_months.loc[valid_months['Marginal_Cost_Per_Attendee'].idxmax(), 'Month']})")
print(f"Lowest Marginal Cost: ${valid_months['Marginal_Cost_Per_Attendee'].min():.2f} (Month {valid_months.loc[valid_months['Marginal_Cost_Per_Attendee'].idxmin(), 'Month']})")



Marginal Cost Per Attendee by Month:
    Month  Total_Operational_Cost  Estimated_Attendees  \
0       1                -4256493                 8898   
1       2                -3835770                55669   
2       3                -3682023                52032   
3       4                -3792475                44637   
4       5                -3949920                34541   
5       6                -3851174                24211   
6       7                -4145845                29446   
7       8                -4096397                47246   
8       9                -3815151                40119   
9      10                -3981068                57001   
10     11                -4455473                 5998   
11     12                -3945202                    0   

    Marginal_Cost_Per_Attendee  
0                  -478.365138  
1                   -68.903160  
2                   -70.764587  
3                   -84.962587  
4                  -114.354535  
5        

In [5]:
# Step 4: Create visualization
alt.data_transformers.disable_max_rows()

# Add month labels
month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 
               7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
cost_analysis["Month_Label"] = cost_analysis["Month"].map(month_names)

# Filter out months with no attendees for visualization
cost_analysis_filtered = cost_analysis[cost_analysis["Estimated_Attendees"] > 0].copy()

# Create bar chart
bar_chart = alt.Chart(cost_analysis_filtered).mark_bar(color='#e67e22').encode(
    x=alt.X('Month:O', title='Month', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('Marginal_Cost_Per_Attendee:Q', title='Marginal Cost Per Attendee ($)'),
    tooltip=[
        alt.Tooltip('Month_Label:N', title='Month'),
        alt.Tooltip('Marginal_Cost_Per_Attendee:Q', title='Cost per Attendee', format='$,.2f'),
        alt.Tooltip('Total_Operational_Cost:Q', title='Total Op Costs', format='$,.0f'),
        alt.Tooltip('Estimated_Attendees:Q', title='Attendees', format=',')
    ]
).properties(
    title='Marginal Cost Per Attendee by Month',
    width=700,
    height=400
)

# Add text labels on top of bars
text = alt.Chart(cost_analysis_filtered).mark_text(
    align='center',
    baseline='bottom',
    dy=-5,
    fontSize=11
).encode(
    x=alt.X('Month:O'),
    y=alt.Y('Marginal_Cost_Per_Attendee:Q'),
    text=alt.Text('Marginal_Cost_Per_Attendee:Q', format='$,.2f')
)

chart = bar_chart + text
chart


In [6]:
# Additional visualization: Line chart to show trend
line_chart = alt.Chart(cost_analysis_filtered).mark_line(
    color='#e67e22',
    strokeWidth=3,
    point=alt.OverlayMarkDef(size=100, filled=True, color='#e67e22')
).encode(
    x=alt.X('Month:O', title='Month', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('Marginal_Cost_Per_Attendee:Q', title='Marginal Cost Per Attendee ($)'),
    tooltip=[
        alt.Tooltip('Month_Label:N', title='Month'),
        alt.Tooltip('Marginal_Cost_Per_Attendee:Q', title='Cost per Attendee', format='$,.2f'),
        alt.Tooltip('Total_Operational_Cost:Q', title='Total Op Costs', format='$,.0f'),
        alt.Tooltip('Estimated_Attendees:Q', title='Attendees', format=',')
    ]
).properties(
    title='Marginal Cost Per Attendee Trend',
    width=700,
    height=400
)

line_chart


In [7]:
# Breakdown visualization: Show cost components
# Get individual cost sources by month
cost_breakdown = operational_costs.pivot_table(
    index='Month', 
    columns='Source', 
    values='Revenue', 
    aggfunc='sum',
    fill_value=0
).reset_index()

# Merge with attendees
cost_breakdown = cost_breakdown.merge(
    food_revenue[["Month", "Estimated_Attendees"]], 
    on="Month"
)

# Calculate per-attendee costs for each category
for source in operational_sources:
    if source in cost_breakdown.columns:
        cost_breakdown[f"{source}_Per_Attendee"] = np.where(
            cost_breakdown["Estimated_Attendees"] > 0,
            cost_breakdown[source] / cost_breakdown["Estimated_Attendees"],
            0
        )

print("\n\nCost Breakdown Per Attendee by Month:")
print(cost_breakdown[["Month"] + [f"{s}_Per_Attendee" for s in operational_sources if s in cost_breakdown.columns]])




Cost Breakdown Per Attendee by Month:
    Month  Staff_Per_Attendee  Insurance_Per_Attendee  \
0       1         -310.743987              -17.981569   
1       2          -61.075284               -2.874131   
2       3          -63.768450               -3.075031   
3       4          -75.318682               -3.584470   
4       5          -97.319128               -4.632176   
5       6         -132.088720               -6.608566   
6       7         -118.352238               -5.433675   
7       8          -74.662405               -3.386530   
8       9          -81.657070               -3.988135   
9      10          -61.104191               -2.806968   
10     11         -560.520173              -26.675559   
11     12            0.000000                0.000000   

    Maintenance_Per_Attendee  Utilities_Per_Attendee  Advertising_Per_Attendee  
0                -168.577208              -30.241964                 49.179591  
1                  -9.700192               -3.965762    

In [8]:
# Export results
cost_analysis.to_csv("Byte_Datasets/marginal_cost_per_attendee.csv", index=False)
cost_breakdown.to_csv("Byte_Datasets/cost_breakdown_by_category.csv", index=False)

print("\n✓ Data exported to:")
print("  - Byte_Datasets/marginal_cost_per_attendee.csv")
print("  - Byte_Datasets/cost_breakdown_by_category.csv")



✓ Data exported to:
  - Byte_Datasets/marginal_cost_per_attendee.csv
  - Byte_Datasets/cost_breakdown_by_category.csv
