In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
import warnings
warnings.filterwarnings("ignore")

# --- 1. Re-run Final Data Prep (To ensure features are correct) ---
df = pd.read_csv("retail_price.csv")
df.drop(columns=["product_id", "month_year", "total_price"], inplace=True)
df['price_ratio_comp1'] = df['unit_price'] / (df['comp_1'] + 1e-6)
df['price_ratio_comp2'] = df['unit_price'] / (df['comp_2'] + 1e-6)
df['price_ratio_comp3'] = df['unit_price'] / (df['comp_3'] + 1e-6)
df = pd.get_dummies(df, columns=["product_category_name"], drop_first=True)

y_orig = df['qty']
X = df.drop(columns=['qty'])
y_train_trans = np.log1p(y_orig) # We need the full log-transformed series for the GBR model fit
X_train, X_test, y_train_orig, y_test_orig = train_test_split(X, y_orig, test_size=0.2, random_state=42)

# Re-train the GBR model (as a proxy for your tuned LGBM)
gbr_model = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1, max_depth=5, random_state=42)
gbr_model.fit(X, np.log1p(y_orig)) # Fit on full dataset for simulation purposes

# --- 2. Define Optimization Parameters ---
COGS = 50.00 # Placeholder: Assume a constant $50 COGS per unit

# Select one observation from the test set to run the optimization simulation
# We will optimize the price for the first item in the test set (index 0)
sample_row = X_test.iloc[0].copy()
original_price = sample_row['unit_price']
original_qty = y_test_orig.iloc[0]

# Define a price range to test (e.g., +/- 100% of the current price, in $1 increments)
price_range = np.arange(1.0, 300.0, 1.0) # Test prices from $1 to $300
best_price = original_price
max_profit = -np.inf
optimization_results = []

# --- 3. Run Optimization Simulation ---
print(f"--- Optimizing Price for Sample Product (Original Price: ${original_price:.2f}) ---")

for price in price_range:
    # 3a. Prepare input features for the new price
    sim_row = sample_row.copy()
    sim_row['unit_price'] = price
    
    # Recalculate the price ratio features for the new price
    sim_row['price_ratio_comp1'] = price / (sim_row['comp_1'] + 1e-6)
    sim_row['price_ratio_comp2'] = price / (sim_row['comp_2'] + 1e-6)
    sim_row['price_ratio_comp3'] = price / (sim_row['comp_3'] + 1e-6)
    
    # 3b. Predict Demand (Quantity Sold)
    # The model predicts the log-transformed quantity, so we need to inverse transform
    log_qty_pred = gbr_model.predict(pd.DataFrame([sim_row]))[0]
    qty_pred = np.expm1(log_qty_pred)
    qty_pred = max(0, qty_pred) # Ensure demand is not negative

    # 3c. Calculate Profit
    freight = sim_row['freight_price']
    margin = price - COGS - freight
    
    # Profit is 0 if margin is negative, otherwise it's margin * quantity
    if margin > 0:
        profit = margin * qty_pred
    else:
        profit = 0

    optimization_results.append({
        'Unit_Price': price,
        'Predicted_Qty': qty_pred,
        'Profit': profit
    })
    
    # 3d. Update Best Price
    if profit > max_profit:
        max_profit = profit
        best_price = price

# --- 4. Final Output ---
results_df = pd.DataFrame(optimization_results)
results_df.to_csv('price_optimization_simulation.csv', index=False)

print(f"\nâœ… Original Price: ${original_price:.2f} (Original Profit Estimate: ${(original_price - COGS - sample_row['freight_price']) * original_qty:.2f})")
print(f"ðŸ’° Optimal Price Found: ${best_price:.2f}")
print(f"ðŸ”¥ Maximum Profit at Optimal Price: ${max_profit:.2f}")

print("\nðŸ“ˆ Price Optimization Simulation Results saved to 'price_optimization_simulation.csv' (File is available for download).")

--- Optimizing Price for Sample Product (Original Price: $174.43) ---

âœ… Original Price: $174.43 (Original Profit Estimate: $1861.93)
ðŸ’° Optimal Price Found: $271.00
ðŸ”¥ Maximum Profit at Optimal Price: $2281.43

ðŸ“ˆ Price Optimization Simulation Results saved to 'price_optimization_simulation.csv' (File is available for download).
