In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

### Load Your Clean Private Seller Dataset

In [None]:
print("Loading Data")
try:
    df = pd.read_csv('private_market_data.csv')
    print("'private_market_data.csv' loaded successfully!")
    print(f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns.")
except FileNotFoundError:
    print("Error: Make sure 'private_market_data.csv' is in the same directory.")
if 'model' in df.columns:
    df = df.drop('model', axis=1)
    print("Redundant 'model' column removed.")

print("\nData Preview:")
print(df.head())
print("\nData Info:")
df.info()

### Feature Engineering (One-Hot Encoding)

In [None]:
print("\nPreparing Data for the Model")

model_ready_df = pd.get_dummies(df, columns=['company', 'name', 'fuel_type', 'transmission'])
print("One-Hot Encoding complete.")
print(f"The dataset now has {model_ready_df.shape[1]} columns after encoding.")

### Split Data into Training and Testing Sets

In [None]:
print("\nSplitting Data for Training and Testing ")

X = model_ready_df.drop('price', axis=1)
y = model_ready_df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Data split complete: {X_train.shape[0]} rows for training, {X_test.shape[0]} rows for testing.")


### Train the XGBoost Model

In [None]:
print("\nTraining the Model")
print("Starting private seller model training")

model = xgb.XGBRegressor(
    device='cpu',
    n_estimators=1000,
    learning_rate=0.05,
    max_depth=5,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)
print("Model training complete!")

### Evaluate Model Performance

In [None]:
print("\n--- Step 5: Evaluating Model Performance ---")

predictions = model.predict(X_test)

r2 = r2_score(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

print(f"R-squared (R2): {r2:.3f}")
print(f"Mean Absolute Error (MAE): ₹{mae:,.2f}")

### Save the Trained Model

In [None]:
print("\nSaving the Model")

joblib.dump(model, 'private_seller_model.joblib')

print("\nPrivate seller model has been saved as 'private_seller_model.joblib'")