In [1]:
# create src folder
import os
os.makedirs("src", exist_ok=True)
open("src/__init__.py", "a").close()

In [5]:
# write elasticity.py
elasticity_code = """
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.utils import resample

def estimate_loglog_ols(df_ts, price_col='avg_price', qty_col='units', controls=None):
    df = df_ts.copy().dropna(subset=[price_col, qty_col])
    df = df[df[qty_col] > 0]

    df['log_q'] = np.log(df[qty_col])
    df['log_p'] = np.log(df[price_col])

    df['promo_flag'] = (df[price_col] < df[price_col].median() * 0.95).astype(int)
    df['month'] = pd.to_datetime(df['week']).dt.month

    formula = 'log_q ~ log_p + promo_flag + C(month)'
    model = smf.ols(formula=formula, data=df).fit(cov_type='HC3')

    beta = model.params.get('log_p', np.nan)

    boots = []
    for i in range(500):
        sample = df.sample(frac=1, replace=True)
        try:
            m = smf.ols(formula=formula, data=sample).fit()
            boots.append(m.params.get('log_p'))
        except:
            pass

    ci_lower = np.percentile(boots, 2.5) if len(boots) > 0 else np.nan
    ci_upper = np.percentile(boots, 97.5) if len(boots) > 0 else np.nan

    return {
        'model': model,
        'elasticity': beta,
        'ci': (ci_lower, ci_upper),
        'n_obs': len(df)
    }
"""

In [6]:
with open("src/elasticity.py", "w", encoding="utf-8") as f:
    f.write(elasticity_code)

In [8]:
# write simulator.py
sim_code = """
import numpy as np
def revenue_for_price_grid(elasticity, C, price_grid):
    # demand = C * p^elasticity
    demand = C * (price_grid ** elasticity)
    revenue = price_grid * demand
    return demand, revenue

def find_revenue_max_price(elasticity_est, elasticity_boots, C, p0,
                           floor_price=None, ceil_price=None,
                           down_pct=0.3, up_pct=0.2, step=0.01):
    # define grid
    if floor_price is None:
        floor_price = p0*(1-down_pct)
    if ceil_price is None:
        ceil_price = p0*(1+up_pct)
    grid = np.arange(floor_price, ceil_price+1e-9, step)
    # point estimate
    _, rev = revenue_for_price_grid(elasticity_est, C, grid)
    best_idx = np.nanargmax(rev)
    best_price = grid[best_idx]
    best_revenue = rev[best_idx]
    # uncertainty: compute revenue for many bootstrap elasticities and get quantiles of revenue-max price
    best_prices_boot = []
    for b in elasticity_boots:
        _, revb = revenue_for_price_grid(b, C, grid)
        best_prices_boot.append(grid[np.nanargmax(revb)])
    lower = np.percentile(best_prices_boot, 2.5)
    upper = np.percentile(best_prices_boot, 97.5)
    return {'best_price':best_price, 'best_revenue':best_revenue, 'price_ci':(lower,upper), 'grid':grid, 'rev_grid':rev}
"""

In [9]:
with open("src/simulator.py", "w", encoding="utf-8") as f:
    f.write(sim_code)

In [10]:
import os
os.makedirs("src", exist_ok=True)

In [11]:
with open("src/__init__.py", "w") as f:
    pass

In [12]:
import sys
from pathlib import Path

ROOT = Path().resolve()
sys.path.append(str(ROOT))

In [None]:
# streamlit_app/app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
from pathlib import Path
from src.elasticity import estimate_loglog_ols
from src.simulator import find_revenue_max_price

DATA = Path('../data/online_retail.csv')  # adjust path if running from streamlit_app/

@st.cache_data
def load_agg():
    df = pd.read_csv(DATA, parse_dates=['InvoiceDate'])
    # reuse cleaning logic or load precomputed agg
    agg = pd.read_csv('../data/agg_weekly_per_sku.csv', parse_dates=['week'])
    return agg

agg = load_agg()
skus = sorted(agg['StockCode'].unique())
sku = st.sidebar.selectbox("Select SKU", skus)
sku_df = agg[agg['StockCode']==sku].sort_values('week')

st.title(f"SKU: {sku} — Price Elasticity & Revenue Simulator")
# Elasticity estimation
res = estimate_loglog_ols(sku_df)
st.metric("Elasticity (beta on log price)", f"{res['elasticity']:.3f}")
st.write("95% CI:", res['ci'])

# Show scatter and fitted line
sku_df['log_q'] = np.log(sku_df['units'])
sku_df['log_p'] = np.log(sku_df['avg_price'])
fig = px.scatter(sku_df, x='log_p', y='log_q', trendline='ols', title='Log(price) vs log(quantity)')
st.plotly_chart(fig, use_container_width=True)

# Revenue grid and recommended price
p0 = sku_df['avg_price'].iloc[-1]
# compute C:
a = res['model'].params.get('Intercept', 0)
C = np.exp(a)  # approximate
# sample bootstrap elasticity from file or estimate
elasticity_boots = np.random.normal(res['elasticity'], scale=(res['ci'][1]-res['ci'][0])/4, size=500)  # quick approx
sim = find_revenue_max_price(res['elasticity'], elasticity_boots, C, p0, step=max(0.01, p0*0.01))
grid = sim['grid']; rev = sim['rev_grid']
rev_df = pd.DataFrame({'price':grid,'revenue':rev})
fig2 = px.line(rev_df, x='price', y='revenue', title='Price vs Expected Revenue')
fig2.add_vline(x=sim['best_price'], line_dash='dash', annotation_text=f"Recommended: {sim['best_price']:.2f}")
st.plotly_chart(fig2, use_container_width=True)

st.markdown("**Recommended price (point estimate):** {:.2f}".format(sim['best_price']))
st.markdown("**Recommended price (95% CI):** {:.2f} — {:.2f}".format(sim['price_ci'][0], sim['price_ci'][1]))

# Export
if st.button("Export recommendation CSV"):
    out = pd.DataFrame([{
        'StockCode':sku, 'recommended_price':sim['best_price'],
        'ci_lower':sim['price_ci'][0], 'ci_upper':sim['price_ci'][1]
    }])
    out.to_csv('sku_recommendation.csv', index=False)
    st.success("Exported sku_recommendation.csv")