# 💵 Stage 5: Pricing Strategy Development (Updated)

This notebook uses Random Forest price predictions to flag underpriced and overpriced listings, apply pricing strategy logic, and categorize vehicles by pricing bands.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Load and prepare data
df = pd.read_csv('/content/used_cars_cleaned.csv')
features = ['odometer', 'vehicle_age', 'is_clean_title', 'manufacturer', 'condition', 'transmission']
df = df.dropna(subset=features + ['price'])
df['log_price'] = np.log1p(df['price'])

# One-hot encode
df_encoded = pd.get_dummies(df[features], drop_first=True)
X = df_encoded
y = df['log_price']

# Split and train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict on full dataset
df['predicted_log_price'] = rf.predict(X)
df['predicted_price'] = np.expm1(df['predicted_log_price'])


## 🏷️ Pricing Flag: Overpriced / Underpriced / Fair

In [None]:
df['price_diff'] = df['price'] - df['predicted_price']
df['price_pct_diff'] = 100 * df['price_diff'] / df['predicted_price']

def categorize_price(diff):
    if diff > 15:
        return 'Overpriced'
    elif diff < -15:
        return 'Underpriced'
    else:
        return 'Fair'

df['pricing_flag'] = df['price_pct_diff'].apply(categorize_price)
df['pricing_flag'].value_counts()

## 🪙 Pricing Bands: Budget / Mid-Range / Premium

In [None]:
def price_band(price):
    if price < 10000:
        return 'Budget'
    elif price < 30000:
        return 'Mid-Range'
    else:
        return 'Premium'

df['pricing_band'] = df['price'].apply(price_band)
df['pricing_band'].value_counts()

## 💡 Strategy Logic: 5% Discount for Aged Vehicles (>10 yrs)

In [None]:
df['recommended_price'] = df['predicted_price']
df.loc[df['vehicle_age'] > 10, 'recommended_price'] *= 0.95
df[['vehicle_age', 'predicted_price', 'recommended_price']].head()

## 📊 Pricing Flags by Manufacturer

In [None]:
top_makes = df['manufacturer'].value_counts().nlargest(10).index
plt.figure(figsize=(12, 6))
sns.countplot(data=df[df['manufacturer'].isin(top_makes)], x='manufacturer', hue='pricing_flag')
plt.title('Pricing Flag Distribution by Top 10 Manufacturers')
plt.xlabel('Manufacturer')
plt.ylabel('Vehicle Count')
plt.xticks(rotation=45)
plt.legend(title='Pricing Flag')
plt.tight_layout()
plt.show()