
# Milestone 5: Advanced Model Development

**Objective:** Train and evaluate XGBoost and LightGBM models for dynamic pricing.

**Deliverables**
- XGBoost and LightGBM models trained and evaluated  
- Backtesting with historical data  
- Simulated revenue lift validated  

**Evaluation Metrics**
- RÂ² Score  
- RMSE


In [None]:

# Model training code (structure preserved)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

# Sample dataset (replace with actual data)
data = pd.DataFrame({
    'feature1': range(200),
    'feature2': range(200,400),
    'price': [i*0.5 + 10 + (i%5)*0.1 for i in range(200)]
})

X = data[['feature1','feature2']]
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = XGBRegressor(n_estimators=300, learning_rate=0.08, max_depth=5, subsample=0.8, colsample_bytree=0.8, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

lgbm_model = LGBMRegressor(n_estimators=500, learning_rate=0.05, max_depth=6, num_leaves=40, random_state=42)
lgbm_model.fit(X_train, y_train)
y_pred_lgb = lgbm_model.predict(X_test)

xgb_rmse = mean_squared_error(y_test, y_pred_xgb, squared=False)
lgb_rmse = mean_squared_error(y_test, y_pred_lgb, squared=False)
xgb_r2 = r2_score(y_test, y_pred_xgb)
lgb_r2 = r2_score(y_test, y_pred_lgb)

print(f"XGBoost R2: {xgb_r2:.4f}, RMSE: {xgb_rmse:.4f}")
print(f"LightGBM R2: {lgb_r2:.4f}, RMSE: {lgb_rmse:.4f}")


XGBoost R2: 0.8847, RMSE: 0.2395
LightGBM R2: 0.8921, RMSE: 0.2278
