In [2]:
# Load libraries
import pandas as pd
import altair as alt
import numpy as np
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression


In [3]:
# Step 1: Load existing data and elasticity
price_data = pd.read_csv("Byte_Datasets/average_ticket_price_by_month.csv")
marginal_costs = pd.read_csv("Byte_Datasets/marginal_cost_per_attendee.csv")

# Get elasticity from previous regression
price_data_valid = price_data[price_data["Estimated_Attendees"] > 0].copy()
price_data_valid["Log_Price"] = np.log(price_data_valid["Average_Ticket_Price"])
price_data_valid["Log_Attendees"] = np.log(price_data_valid["Estimated_Attendees"])

X = price_data_valid["Log_Price"]
y = price_data_valid["Log_Attendees"]
X_with_const = sm.add_constant(X)
model = sm.OLS(y, X_with_const)
results = model.fit()
elasticity = results.params['Log_Price']
abs_elasticity = abs(elasticity)

print("="*80)
print("ELASTICITY")
print("="*80)
print(f"Absolute Elasticity |E|: {abs_elasticity:.4f}")
print("="*80)


ELASTICITY
Absolute Elasticity |E|: 3.3891


In [4]:
# Step 2: Estimate marginal cost as a function of attendance
# Model: MC_t = a - b*Q_t
# Where MC declines with quantity (economies of scale)

# Merge data
analysis_data = price_data.merge(marginal_costs[["Month", "Marginal_Cost_Per_Attendee"]], on="Month")
analysis_data_valid = analysis_data[analysis_data["Estimated_Attendees"] > 0].copy()

# Use absolute values of marginal cost
analysis_data_valid["MC_Positive"] = abs(analysis_data_valid["Marginal_Cost_Per_Attendee"])

# Run regression: MC = a - b*Q
X_scale = analysis_data_valid[["Estimated_Attendees"]]
y_scale = analysis_data_valid["MC_Positive"]

scale_model = LinearRegression()
scale_model.fit(X_scale, y_scale)

a = scale_model.intercept_
b = -scale_model.coef_[0]  # Negative because we want MC = a - b*Q

print("\n" + "="*80)
print("MARGINAL COST SCALE RELATIONSHIP")
print("="*80)
print(f"MC(Q) = a - b*Q")
print(f"a (base cost): ${a:.2f}")
print(f"b (scale coefficient): ${b:.6f}")
print(f"\nInterpretation: For each additional attendee, marginal cost decreases by ${b:.2f}")
print("="*80)



MARGINAL COST SCALE RELATIONSHIP
MC(Q) = a - b*Q
a (base cost): $575.04
b (scale coefficient): $0.010540

Interpretation: For each additional attendee, marginal cost decreases by $0.01


In [5]:
# Step 3: Calculate scale-adjusted marginal costs and optimal prices
# Scale-adjusted MC: MC_t = a - b*Q_t
analysis_data_valid["MC_Scale_Adjusted"] = a - b * analysis_data_valid["Estimated_Attendees"]

# Ensure scale-adjusted MC is positive
analysis_data_valid["MC_Scale_Adjusted"] = analysis_data_valid["MC_Scale_Adjusted"].clip(lower=0)

# Calculate scale-adjusted optimal price using Lerner Index
# P*_t = MC_scale / (1 - 1/|E|)
analysis_data_valid["Optimal_Price_Scale_Adjusted"] = analysis_data_valid["MC_Scale_Adjusted"] / (1 - (1 / abs_elasticity))

# Calculate original optimal price (without scale adjustment) for comparison
analysis_data_valid["Optimal_Price_Original"] = analysis_data_valid["MC_Positive"] / (1 - (1 / abs_elasticity))

print("\n" + "="*80)
print("COMPARISON: ORIGINAL VS SCALE-ADJUSTED OPTIMAL PRICES")
print("="*80)
print(analysis_data_valid[["Month", "Estimated_Attendees", "MC_Positive", "MC_Scale_Adjusted", 
                           "Average_Ticket_Price", "Optimal_Price_Original", "Optimal_Price_Scale_Adjusted"]])
print("="*80)



COMPARISON: ORIGINAL VS SCALE-ADJUSTED OPTIMAL PRICES
    Month  Estimated_Attendees  MC_Positive  MC_Scale_Adjusted  \
0       1                 8898   478.365138         481.257723   
1       2                55669    68.903160           0.000000   
2       3                52032    70.764587          26.637045   
3       4                44637    84.962587         104.578342   
4       5                34541   114.354535         210.987448   
5       6                24211   159.067118         319.862850   
6       7                29446   140.794845         264.687368   
7       8                47246    86.703573          77.080189   
8       9                40119    95.095865         152.196839   
9      10                57001    69.842073           0.000000   
10     11                 5998   742.826442         511.822938   

    Average_Ticket_Price  Optimal_Price_Original  Optimal_Price_Scale_Adjusted  
0             136.563835              678.595639                    682

In [6]:
# Summary statistics
print("\n" + "="*80)
print("SUMMARY STATISTICS")
print("="*80)
print(f"Average Current Price: ${analysis_data_valid['Average_Ticket_Price'].mean():.2f}")
print(f"Average Optimal Price (Original): ${analysis_data_valid['Optimal_Price_Original'].mean():.2f}")
print(f"Average Optimal Price (Scale-Adjusted): ${analysis_data_valid['Optimal_Price_Scale_Adjusted'].mean():.2f}")
print(f"\nAverage MC (Original): ${analysis_data_valid['MC_Positive'].mean():.2f}")
print(f"Average MC (Scale-Adjusted): ${analysis_data_valid['MC_Scale_Adjusted'].mean():.2f}")

scale_diff = analysis_data_valid['Optimal_Price_Scale_Adjusted'] - analysis_data_valid['Average_Ticket_Price']
print(f"\nAverage price adjustment needed (scale-adjusted): ${scale_diff.mean():.2f}")
print(f"Percentage adjustment needed: {(scale_diff.mean() / analysis_data_valid['Average_Ticket_Price'].mean() * 100):.1f}%")
print("="*80)



SUMMARY STATISTICS
Average Current Price: $91.33
Average Optimal Price (Original): $272.32
Average Optimal Price (Scale-Adjusted): $277.15

Average MC (Original): $191.97
Average MC (Scale-Adjusted): $195.37

Average price adjustment needed (scale-adjusted): $185.82
Percentage adjustment needed: 203.5%


In [7]:
# Step 4: Visualization - Comparison of all three prices
alt.data_transformers.disable_max_rows()

# Add month labels
month_names = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 
               7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
analysis_data_valid["Month_Label"] = analysis_data_valid["Month"].map(month_names)

# Reshape data for grouped bar chart
pricing_comparison = pd.melt(
    analysis_data_valid,
    id_vars=['Month', 'Month_Label'],
    value_vars=['Average_Ticket_Price', 'Optimal_Price_Original', 'Optimal_Price_Scale_Adjusted'],
    var_name='Price_Type',
    value_name='Price'
)

# Rename for better labels
pricing_comparison['Price_Type'] = pricing_comparison['Price_Type'].replace({
    'Average_Ticket_Price': 'Current Price',
    'Optimal_Price_Original': 'Optimal (Original)',
    'Optimal_Price_Scale_Adjusted': 'Optimal (Scale-Adjusted)'
})

# Create grouped bar chart
bars = alt.Chart(pricing_comparison).mark_bar().encode(
    x=alt.X('Month:O', title='Month', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('Price:Q', title='Ticket Price ($)'),
    color=alt.Color('Price_Type:N', 
                    title='Price Type',
                    scale=alt.Scale(domain=['Current Price', 'Optimal (Original)', 'Optimal (Scale-Adjusted)'],
                                   range=['#3498db', '#e74c3c', '#2ecc71'])),
    xOffset='Price_Type:N',
    tooltip=[
        alt.Tooltip('Month_Label:N', title='Month'),
        alt.Tooltip('Price_Type:N', title='Type'),
        alt.Tooltip('Price:Q', title='Price', format='$,.2f')
    ]
).properties(
    title='Pricing Comparison: Current vs Original Optimal vs Scale-Adjusted Optimal',
    width=700,
    height=400
)

bars


In [8]:
# Line chart showing all three prices over time
line_data = analysis_data_valid[['Month', 'Month_Label', 'Average_Ticket_Price', 
                                   'Optimal_Price_Original', 'Optimal_Price_Scale_Adjusted']].copy()

# Reshape for line chart
line_long = pd.melt(
    line_data,
    id_vars=['Month', 'Month_Label'],
    value_vars=['Average_Ticket_Price', 'Optimal_Price_Original', 'Optimal_Price_Scale_Adjusted'],
    var_name='Metric',
    value_name='Value'
)

# Rename
line_long['Metric'] = line_long['Metric'].replace({
    'Average_Ticket_Price': 'Current Price',
    'Optimal_Price_Original': 'Optimal (Original)',
    'Optimal_Price_Scale_Adjusted': 'Optimal (Scale-Adjusted)'
})

# Create line chart
line_chart = alt.Chart(line_long).mark_line(strokeWidth=3).encode(
    x=alt.X('Month:O', title='Month', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('Value:Q', title='Price ($)'),
    color=alt.Color('Metric:N',
                    scale=alt.Scale(domain=['Current Price', 'Optimal (Original)', 'Optimal (Scale-Adjusted)'],
                                   range=['#3498db', '#e74c3c', '#2ecc71']),
                    legend=alt.Legend(title='Price Type'))
).properties(
    title='Price Trends: Scale-Adjusted Optimal Smooths Out Extreme Values',
    width=700,
    height=400
)

# Add points
points = alt.Chart(line_long).mark_point(size=100, filled=True).encode(
    x=alt.X('Month:O'),
    y=alt.Y('Value:Q'),
    color=alt.Color('Metric:N',
                    scale=alt.Scale(domain=['Current Price', 'Optimal (Original)', 'Optimal (Scale-Adjusted)'],
                                   range=['#3498db', '#e74c3c', '#2ecc71'])),
    tooltip=[
        alt.Tooltip('Month_Label:N', title='Month'),
        alt.Tooltip('Metric:N', title='Type'),
        alt.Tooltip('Value:Q', title='Price', format='$,.2f')
    ]
)

combined = line_chart + points
combined


In [9]:
# Show the marginal cost relationship
mc_comparison = analysis_data_valid[['Month', 'Month_Label', 'Estimated_Attendees', 
                                       'MC_Positive', 'MC_Scale_Adjusted']].copy()

# Reshape
mc_long = pd.melt(
    mc_comparison,
    id_vars=['Month', 'Month_Label', 'Estimated_Attendees'],
    value_vars=['MC_Positive', 'MC_Scale_Adjusted'],
    var_name='MC_Type',
    value_name='Marginal_Cost'
)

mc_long['MC_Type'] = mc_long['MC_Type'].replace({
    'MC_Positive': 'MC (Original)',
    'MC_Scale_Adjusted': 'MC (Scale-Adjusted)'
})

# Scatter plot with trend line
scatter = alt.Chart(mc_long).mark_circle(size=100).encode(
    x=alt.X('Estimated_Attendees:Q', title='Number of Attendees'),
    y=alt.Y('Marginal_Cost:Q', title='Marginal Cost ($)'),
    color=alt.Color('MC_Type:N',
                    scale=alt.Scale(domain=['MC (Original)', 'MC (Scale-Adjusted)'],
                                   range=['#e74c3c', '#2ecc71'])),
    tooltip=[
        alt.Tooltip('Month_Label:N', title='Month'),
        alt.Tooltip('Estimated_Attendees:Q', title='Attendees', format=','),
        alt.Tooltip('Marginal_Cost:Q', title='MC', format='$,.2f'),
        alt.Tooltip('MC_Type:N', title='Type')
    ]
).properties(
    title='Marginal Cost vs Attendance: Scale-Adjusted Model Shows Economies of Scale',
    width=700,
    height=400
)

scatter


In [10]:
# Export results
analysis_data_valid.to_csv("Byte_Datasets/optimal_pricing_scale_adjusted.csv", index=False)
print("\n✓ Results exported to: Byte_Datasets/optimal_pricing_scale_adjusted.csv")



✓ Results exported to: Byte_Datasets/optimal_pricing_scale_adjusted.csv
