# 🤖 Stage 4: Advanced Modeling & Forecasting (Updated with Gradient Boosting)

Includes:
- One-hot encoding
- Log-transformed target
- Models: Linear, Ridge, Lasso, Random Forest, XGBoost, Gradient Boosting

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from xgboost import XGBRegressor

# Load dataset
df = pd.read_csv('/content/used_cars_cleaned.csv')

# Filter important columns
features = ['odometer', 'vehicle_age', 'is_clean_title', 'manufacturer', 'condition', 'transmission']
df = df.dropna(subset=features + ['price'])

# Log-transform the price (target)
df['log_price'] = np.log1p(df['price'])  # log1p avoids log(0)


## 🔢 One-Hot Encoding & Split

In [None]:
df_encoded = pd.get_dummies(df[features], drop_first=True)
X = df_encoded
y = df['log_price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 📊 Model Evaluation Function

In [None]:
def evaluate_model(model, X_test, y_test, y_pred_log):
    y_pred = np.expm1(y_pred_log)  # convert log price back
    y_true = np.expm1(y_test)
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"RMSE: ${rmse:,.2f}")
    print(f"MAE: ${mae:,.2f}")
    print(f"R² Score: {r2:.4f}")

## 🔍 Linear Regression

In [None]:
linear_regression = LinearRegression()
linear_regression.fit(X_train, y_train)
y_pred = linear_regression.predict(X_test)
evaluate_model(linear_regression, X_test, y_test, y_pred)

## 🔍 Ridge Regression

In [None]:
ridge_regression = Ridge(alpha=1.0)
ridge_regression.fit(X_train, y_train)
y_pred = ridge_regression.predict(X_test)
evaluate_model(ridge_regression, X_test, y_test, y_pred)

## 🔍 Lasso Regression

In [None]:
lasso_regression = Lasso(alpha=0.1)
lasso_regression.fit(X_train, y_train)
y_pred = lasso_regression.predict(X_test)
evaluate_model(lasso_regression, X_test, y_test, y_pred)

## 🔍 Random Forest

In [None]:
random_forest = RandomForestRegressor(n_estimators=100, random_state=42)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
evaluate_model(random_forest, X_test, y_test, y_pred)

## 🔍 XGBoost

In [None]:
xgboost = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
xgboost.fit(X_train, y_train)
y_pred = xgboost.predict(X_test)
evaluate_model(xgboost, X_test, y_test, y_pred)

## 🔍 Gradient Boosting

In [None]:
gradient_boosting = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gradient_boosting.fit(X_train, y_train)
y_pred = gradient_boosting.predict(X_test)
evaluate_model(gradient_boosting, X_test, y_test, y_pred)