In [19]:
# Load libraries
import pandas as pd
import numpy as np
import altair as alt
import statsmodels.api as sm
from scipy import stats


In [20]:
# Load the average ticket price data
price_data = pd.read_csv("Byte_Datasets/average_ticket_price_by_month.csv")

# Filter out months with zero attendees (December)
price_data_valid = price_data[price_data["Estimated_Attendees"] > 0].copy()

print("Data Summary:")
print(price_data_valid[["Month", "Average_Ticket_Price", "Estimated_Attendees"]])


Data Summary:
    Month  Average_Ticket_Price  Estimated_Attendees
0       1            136.563835                 8898
1       2             88.736173                55669
2       3             85.459006                52032
3       4             83.806573                44637
4       5             87.361049                34541
5       6             74.914130                24211
6       7             86.906880                29446
7       8             89.061381                47246
8       9             81.744410                40119
9      10             84.202102                57001
10     11            105.862954                 5998


In [21]:
# Calculate log transformations
price_data_valid["Log_Price"] = np.log(price_data_valid["Average_Ticket_Price"])
price_data_valid["Log_Attendees"] = np.log(price_data_valid["Estimated_Attendees"])

print("\nLog-transformed data:")
print(price_data_valid[["Month", "Average_Ticket_Price", "Log_Price", "Estimated_Attendees", "Log_Attendees"]])



Log-transformed data:
    Month  Average_Ticket_Price  Log_Price  Estimated_Attendees  Log_Attendees
0       1            136.563835   4.916792                 8898       9.093582
1       2             88.736173   4.485668                55669      10.927179
2       3             85.459006   4.448037                52032      10.859614
3       4             83.806573   4.428511                44637      10.706318
4       5             87.361049   4.470050                34541      10.449902
5       6             74.914130   4.316343                24211      10.094562
6       7             86.906880   4.464837                29446      10.290313
7       8             89.061381   4.489326                47246      10.763123
8       9             81.744410   4.403597                40119      10.599605
9      10             84.202102   4.433220                57001      10.950824
10     11            105.862954   4.662145                 5998       8.699181


In [22]:
# Run OLS Regression
# Dependent variable: Log(Number of Attendees)
# Independent variable: Log(Average Ticket Price)

X = price_data_valid["Log_Price"]
y = price_data_valid["Log_Attendees"]

# Add constant for intercept
X_with_const = sm.add_constant(X)

# Fit the model
model = sm.OLS(y, X_with_const)
results = model.fit()

# Display full regression results
print("\n" + "="*80)
print("OLS REGRESSION RESULTS")
print("="*80)
print(results.summary())
print("\n")



OLS REGRESSION RESULTS
                            OLS Regression Results                            
Dep. Variable:          Log_Attendees   R-squared:                       0.521
Model:                            OLS   Adj. R-squared:                  0.468
Method:                 Least Squares   F-statistic:                     9.793
Date:                Thu, 23 Oct 2025   Prob (F-statistic):             0.0121
Time:                        01:46:48   Log-Likelihood:                -7.9243
No. Observations:                  11   AIC:                             19.85
Df Residuals:                       9   BIC:                             20.64
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         25.5687      4

  res = hypotest_fun_out(*samples, **kwds)


In [23]:
# Extract key statistics
intercept = results.params['const']
elasticity = results.params['Log_Price']
r_squared = results.rsquared
adj_r_squared = results.rsquared_adj
p_value = results.pvalues['Log_Price']
std_error = results.bse['Log_Price']

print("="*80)
print("KEY FINDINGS - PRICE ELASTICITY OF DEMAND")
print("="*80)
print(f"\n📊 ELASTICITY COEFFICIENT: {elasticity:.4f}")
print(f"   Interpretation: A 1% increase in ticket price is associated with a {elasticity:.4f}% change in attendance")
print(f"\n📈 R-squared: {r_squared:.4f}")
print(f"   ({r_squared*100:.2f}% of variance in log attendance is explained by log price)")
print(f"\n📈 Adjusted R-squared: {adj_r_squared:.4f}")
print(f"\n🎯 P-value: {p_value:.6f}")
if p_value < 0.05:
    print(f"   ✓ Statistically significant at 5% level")
else:
    print(f"   ✗ NOT statistically significant at 5% level")
print(f"\n📏 Standard Error: {std_error:.4f}")
print(f"\n🔢 Intercept: {intercept:.4f}")
print(f"\n📋 Number of observations: {len(price_data_valid)}")
print("="*80)


KEY FINDINGS - PRICE ELASTICITY OF DEMAND

📊 ELASTICITY COEFFICIENT: -3.3891
   Interpretation: A 1% increase in ticket price is associated with a -3.3891% change in attendance

📈 R-squared: 0.5211
   (52.11% of variance in log attendance is explained by log price)

📈 Adjusted R-squared: 0.4679

🎯 P-value: 0.012134
   ✓ Statistically significant at 5% level

📏 Standard Error: 1.0830

🔢 Intercept: 25.5687

📋 Number of observations: 11


In [24]:
# Visualize the regression
alt.data_transformers.disable_max_rows()

# Add predicted values
price_data_valid["Predicted_Log_Attendees"] = results.fittedvalues
price_data_valid["Residuals"] = results.resid

# Calculate axis ranges with padding
log_price_min = price_data_valid["Log_Price"].min()
log_price_max = price_data_valid["Log_Price"].max()
log_price_range = log_price_max - log_price_min
log_price_padding = log_price_range * 0.15  # 15% padding on each side

log_attendees_min = price_data_valid["Log_Attendees"].min()
log_attendees_max = price_data_valid["Log_Attendees"].max()
log_attendees_range = log_attendees_max - log_attendees_min
log_attendees_padding = log_attendees_range * 0.15  # 15% padding on each side

# Create scatter plot with regression line
scatter = alt.Chart(price_data_valid).mark_circle(size=100, color='#3498db').encode(
    x=alt.X('Log_Price:Q', 
            title='Log(Average Ticket Price)',
            scale=alt.Scale(domain=[log_price_min - log_price_padding, log_price_max + log_price_padding])),
    y=alt.Y('Log_Attendees:Q', 
            title='Log(Number of Attendees)',
            scale=alt.Scale(domain=[log_attendees_min - log_attendees_padding, log_attendees_max + log_attendees_padding])),
    tooltip=[
        alt.Tooltip('Month:O', title='Month'),
        alt.Tooltip('Average_Ticket_Price:Q', title='Avg Price', format='$,.2f'),
        alt.Tooltip('Estimated_Attendees:Q', title='Attendees', format=','),
        alt.Tooltip('Log_Price:Q', title='Log(Price)', format='.4f'),
        alt.Tooltip('Log_Attendees:Q', title='Log(Attendees)', format='.4f')
    ]
)

# Regression line
regression_line = alt.Chart(price_data_valid).mark_line(color='#e74c3c', strokeWidth=2).encode(
    x=alt.X('Log_Price:Q',
            scale=alt.Scale(domain=[log_price_min - log_price_padding, log_price_max + log_price_padding])),
    y=alt.Y('Predicted_Log_Attendees:Q',
            scale=alt.Scale(domain=[log_attendees_min - log_attendees_padding, log_attendees_max + log_attendees_padding]))
)

# Combine
chart = (scatter + regression_line).properties(
    title=f'Price Elasticity of Demand: Log-Log Regression (Elasticity = {elasticity:.4f})',
    width=600,
    height=400
)

chart


In [25]:
# Residual plot to check model assumptions
residual_plot = alt.Chart(price_data_valid).mark_circle(size=100, color='#9b59b6').encode(
    x=alt.X('Predicted_Log_Attendees:Q', title='Fitted Values (Log Attendees)'),
    y=alt.Y('Residuals:Q', title='Residuals'),
    tooltip=[
        alt.Tooltip('Month:O', title='Month'),
        alt.Tooltip('Residuals:Q', title='Residual', format='.4f')
    ]
).properties(
    title='Residual Plot',
    width=600,
    height=400
)

# Add horizontal line at zero
zero_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='red', strokeDash=[5, 5]).encode(
    y='y:Q'
)

residual_chart = residual_plot + zero_line
residual_chart


In [26]:
# Business interpretation
print("\n" + "="*80)
print("BUSINESS INTERPRETATION")
print("="*80)

if elasticity < 0:
    print(f"\n✓ NEGATIVE ELASTICITY: The coefficient is {elasticity:.4f}, which is NEGATIVE.")
    print("  This indicates an INVERSE relationship between price and quantity demanded.")
    print("  This follows the law of demand: when prices are higher, attendance is lower.")
    
    abs_elasticity = abs(elasticity)
    if abs_elasticity > 1:
        print(f"\n📌 ELASTIC DEMAND (|{elasticity:.4f}| > 1):")
        print("  Demand is ELASTIC - attendees are very sensitive to price changes.")
        print(f"  A 1% increase in price leads to a {abs_elasticity:.2f}% decrease in attendance.")
        print("  RECOMMENDATION: Lower prices to increase total revenue.")
    elif abs_elasticity < 1:
        print(f"\n📌 INELASTIC DEMAND (|{elasticity:.4f}| < 1):")
        print("  Demand is INELASTIC - attendees are not very sensitive to price changes.")
        print(f"  A 1% increase in price leads to only a {abs_elasticity:.2f}% decrease in attendance.")
        print("  RECOMMENDATION: Higher prices could increase total revenue.")
    else:
        print(f"\n📌 UNIT ELASTIC DEMAND (|{elasticity:.4f}| ≈ 1):")
        print("  Demand has unit elasticity - price and attendance move proportionally.")
elif elasticity > 0:
    print(f"\n⚠️ POSITIVE ELASTICITY: The coefficient is {elasticity:.4f}, which is POSITIVE.")
    print("  This suggests prices and attendance move in the SAME direction.")
    print("  This could indicate this is a Veblen/Giffen good, or there may be other factors at play.")
    print("  This is unusual for typical demand relationships.")
else:
    print(f"\n⚠️ ZERO ELASTICITY: No relationship between price and attendance.")

print("\n" + "="*80)



BUSINESS INTERPRETATION

✓ NEGATIVE ELASTICITY: The coefficient is -3.3891, which is NEGATIVE.
  This indicates an INVERSE relationship between price and quantity demanded.
  This follows the law of demand: when prices are higher, attendance is lower.

📌 ELASTIC DEMAND (|-3.3891| > 1):
  Demand is ELASTIC - attendees are very sensitive to price changes.
  A 1% increase in price leads to a 3.39% decrease in attendance.
  RECOMMENDATION: Lower prices to increase total revenue.



In [27]:
# Save results
results_summary = pd.DataFrame({
    'Metric': ['Elasticity (Slope)', 'Intercept', 'R-squared', 'Adjusted R-squared', 
               'P-value', 'Standard Error', 'Observations'],
    'Value': [elasticity, intercept, r_squared, adj_r_squared, p_value, std_error, len(price_data_valid)]
})

print("\nRegression Summary Table:")
print(results_summary.to_string(index=False))

# Export results
results_summary.to_csv("Byte_Datasets/elasticity_regression_results.csv", index=False)
price_data_valid.to_csv("Byte_Datasets/elasticity_regression_data.csv", index=False)
print("\n✓ Results exported to:")
print("  - Byte_Datasets/elasticity_regression_results.csv")
print("  - Byte_Datasets/elasticity_regression_data.csv")



Regression Summary Table:
            Metric     Value
Elasticity (Slope) -3.389072
         Intercept 25.568733
         R-squared  0.521095
Adjusted R-squared  0.467884
           P-value  0.012134
    Standard Error  1.082993
      Observations 11.000000

✓ Results exported to:
  - Byte_Datasets/elasticity_regression_results.csv
  - Byte_Datasets/elasticity_regression_data.csv
