<a href="https://colab.research.google.com/github/Kanishka939/PriceItRight/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Install dependencies
!pip install streamlit xgboost matplotlib seaborn pyngrok --quiet

# 2. Import libraries
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from pyngrok import ngrok
import os
import time
from google.colab import files

# 3. Upload your dataset CSV file
uploaded = files.upload()
filename = list(uploaded.keys())[0]
print(f"Uploaded file: {filename}")

# 4. Streamlit app code as a string with your columns & logic
app_code = f'''
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import xgboost as xgb

# --- Page Config ---
st.set_page_config(page_title="🚀 Dynamic Pricing Engine", layout="wide")

st.title("🚀 Dynamic Pricing Engine")
st.write("Dynamically predicts optimal prices based on demand, seasonality, and competitor pricing.")

@st.cache_data(show_spinner=False)
def load_data():
    df = pd.read_csv("{filename}")
    # Convert 'Date' to datetime
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
    # Ensure seasonality is categorical
    if 'Seasonality' in df.columns:
        df['Seasonality'] = df['Seasonality'].astype(str)
    return df

df = load_data()

# Sidebar: Select Product ID to filter
selected_product = st.sidebar.selectbox("Select Product ID", options=sorted(df['Product ID'].unique()))

# Filter df to selected product
df_product = df[df['Product ID'] == selected_product].copy()

# Feature Engineering
# Conversion rate = Units Sold / Units Ordered (avoid div by zero)
df_product['conversion_rate'] = df_product.apply(lambda row: row['Units Sold'] / row['Units Ordered'] if row['Units Ordered']>0 else 0, axis=1)

# Stock velocity = Units Sold / (Inventory Level + 1) to avoid div zero
df_product['stock_velocity'] = df_product['Units Sold'] / (df_product['Inventory Level'] + 1)

# Price elasticity = (Units Sold / Units Ordered) / Price (handle zero price)
df_product['price_elasticity'] = df_product.apply(lambda row: (row['Units Sold'] / row['Units Ordered']) / row['Price'] if row['Units Ordered']>0 and row['Price']>0 else 0, axis=1)

# One-hot encode seasonality
df_product = pd.concat([df_product, pd.get_dummies(df_product['Seasonality'], prefix='season')], axis=1)

# Prepare features - you can add/remove as relevant
features = ['Price', 'Competitor Pricing', 'Inventory Level', 'Discount']
# Add all season dummies that exist for this product
season_cols = [col for col in df_product.columns if col.startswith('season_')]
features += season_cols

X = df_product[features]
y = df_product['Units Sold']

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train models
lr = LinearRegression()
lr.fit(X_train, y_train)

xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, seed=42)
xgb_model.fit(X_train, y_train)

# Sidebar inputs for simulation
st.sidebar.header(f"Simulate Pricing Scenario for Product ID: {selected_product}")

price_min = float(df_product['Price'].min())
price_max = float(df_product['Price'].max())
selected_price = st.sidebar.slider("Set New Price", price_min, price_max, float(df_product['Price'].median()))

comp_price_min = float(df_product['Competitor Pricing'].min())
comp_price_max = float(df_product['Competitor Pricing'].max())
selected_comp_price = st.sidebar.slider("Competitor Price", comp_price_min, comp_price_max, float(df_product['Competitor Pricing'].median()))

selected_inventory = st.sidebar.slider("Inventory Level", 0, int(df_product['Inventory Level'].max()), 10)
selected_discount = st.sidebar.slider("Discount (%)", 0, 100, 0)

selected_season = st.sidebar.selectbox("Seasonality", sorted(df_product['Seasonality'].unique()))

# User inputs for dynamic uplift
revenue_lift_pct = st.sidebar.slider("Expected Revenue Lift (%)", 0, 50, 8)
conversion_lift_pct = st.sidebar.slider("Expected Conversion Lift (%)", 0, 50, 12)

# Create input row for simulation
season_ohe = {{}}
for sc in season_cols:
    season_ohe[sc] = 1 if sc == f"season_{selected_season}" else 0

sim_df = pd.DataFrame({{
    'Price': [selected_price],
    'Competitor Pricing': [selected_comp_price],
    'Inventory Level': [selected_inventory],
    'Discount': [selected_discount],
    **season_ohe
}})

# Predict units sold with both models
pred_lr = lr.predict(sim_df)[0]
pred_xgb = xgb_model.predict(sim_df)[0]

# Calculate margin assuming cost = 70% of price
cost_per_unit = selected_price * 0.7
margin = (selected_price - cost_per_unit) / selected_price

# Conversion rate simulated (approximate)
conversion_rate_sim = pred_xgb / (pred_xgb + 1)  # rough approx to keep <1

# Apply dynamic lifts from user input sliders
revenue_sim = selected_price * pred_xgb
revenue_sim_lift = revenue_sim * (1 + revenue_lift_pct / 100)
conversion_sim_lift = conversion_rate_sim * (1 + conversion_lift_pct / 100)

# Results
col1, col2 = st.columns([1, 2])

with col1:
    st.subheader(f"Pricing Simulation Results for Product ID: {selected_product}")
    st.write(f"Predicted Units Sold (Linear Regression): *{pred_lr:.1f}*")
    st.write(f"Predicted Units Sold (XGBoost): *{pred_xgb:.1f}*")
    st.write(f"Simulated Margin: *{margin:.2%}*")
    st.write(f"Simulated Conversion Rate: *{conversion_rate_sim:.2%}*")
    st.write(f"Projected Revenue (with uplift): *${revenue_sim_lift:,.2f}*")
    st.write(f"Projected Conversion Rate (with uplift): *{conversion_sim_lift:.2%}*")

with col2:
    prices_range = np.linspace(price_min, price_max, 50)
    revenue_vals = []
    for p in prices_range:
        test_input = pd.DataFrame({{
            'Price': [p],
            'Competitor Pricing': [selected_comp_price],
            'Inventory Level': [selected_inventory],
            'Discount': [selected_discount],
            **season_ohe
        }})
        preds = xgb_model.predict(test_input)[0]
        revenue_vals.append(p * preds)

    plt.figure(figsize=(8, 4))
    plt.plot(prices_range, revenue_vals, color='#ffd700', linewidth=3)
    plt.axvline(selected_price, color='black', linestyle='--', label='Selected Price')
    plt.title("Projected Revenue vs Price", color='black')
    plt.xlabel("Price")
    plt.ylabel("Projected Revenue")
    plt.grid(alpha=0.3)
    plt.legend()
    st.pyplot(plt.gcf())
    plt.clf()

# Historical metrics overview
st.subheader(f"Historical Metrics Overview for Product ID: {selected_product}")
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

df_product.groupby('Date')['conversion_rate'].mean().plot(ax=axes[0], color='lightgreen', title="Avg Conversion Rate Over Time")
axes[0].set_ylabel('Conversion Rate')
axes[0].grid(True, linestyle='--', alpha=0.4)

axes[1].hist(df_product['price_elasticity'].dropna(), bins=30, color='coral')
axes[1].set_title("Price Elasticity Distribution")
axes[1].grid(True, linestyle='--', alpha=0.4)

axes[2].hist(df_product['stock_velocity'].dropna(), bins=30, color='skyblue')
axes[2].set_title("Stock Velocity Distribution")
axes[2].grid(True, linestyle='--', alpha=0.4)

st.pyplot(fig)

# Seasonality impact
st.subheader(f"Seasonality Impact on Units Sold for Product ID: {selected_product}")
season_means = df_product.groupby('Seasonality')['Units Sold'].mean().reindex(sorted(df_product['Seasonality'].unique()))

fig2, ax2 = plt.subplots(figsize=(8, 4))
sns.barplot(x=season_means.index, y=season_means.values, palette="viridis", ax=ax2)
ax2.set_ylabel("Average Units Sold")
ax2.set_title("Average Units Sold by Seasonality")
st.pyplot(fig2)

# Feature explanations
st.markdown("""
### Feature Descriptions:
- *Price Elasticity:* Sensitivity of demand relative to price changes.
- *Conversion Rate:* Ratio of units sold to units ordered.
- *Stock Velocity:* Rate of sales relative to current stock.
- *Seasonality:* Categorical variable representing seasonal impact on demand.
""")

# Project info
st.markdown("### About this Project")
st.write("""
- Built a machine learning pricing model to dynamically adjust product prices based on demand, seasonality, and competitor pricing.
- Features engineered from real-world sales, inventory, pricing, discount, and seasonality data.
- Trained regression and XGBoost models to predict optimal price points.
- Streamlit web app allows simulating pricing scenarios to monitor margin, conversion, and revenue impacts.
- Dynamic lift inputs let users test different uplift scenarios.
""")
'''

# 5. Write the app.py file
with open('app.py', 'w') as f:
    f.write(app_code)

# 6. Ngrok setup and launch Streamlit app
NGROK_AUTH_TOKEN = "31FK2E7JkT4867m16wZXh6BNimu_4ESyqyrvewSvvoYjCGwoS"
ngrok.kill()
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

print("Starting Streamlit app...")
public_url = ngrok.connect(8501)
print(f"🚀 Your Streamlit app is live at: {public_url}")

os.system('nohup streamlit run app.py &')

print("Streamlit app should now be running. Open the link above to interact.")

# 7. Keep notebook alive to keep tunnel open
while True:
    time.sleep(10)


Saving retail_store_inventory.csv to retail_store_inventory (1).csv
Uploaded file: retail_store_inventory (1).csv
Starting Streamlit app...
🚀 Your Streamlit app is live at: NgrokTunnel: "https://8fa5f3771be6.ngrok-free.app" -> "http://localhost:8501"
Streamlit app should now be running. Open the link above to interact.
