In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [12]:
df = pd.read_csv(r"C:\AI workforce\Car-price Boosting\Car Selling price.csv")
display(df.head())

Unnamed: 0.1,Unnamed: 0,year,selling_price,km_driven,fuel,seller_type,transmission,owner,selling_price_log,km_driven_log
0,0,2007,60000,70000,Petrol,Individual,Manual,First Owner,11.002117,11.156265
1,1,2007,135000,50000,Petrol,Individual,Manual,First Owner,11.813037,10.819798
2,2,2012,600000,100000,Diesel,Individual,Manual,First Owner,13.304687,11.512935
3,3,2017,250000,46000,Petrol,Individual,Manual,First Owner,12.42922,10.736418
4,4,2014,450000,141000,Diesel,Individual,Manual,Second Owner,13.017005,11.856522


In [13]:
X = df.drop(columns=["selling_price", "name"], errors="ignore")
y = df["selling_price"]

In [14]:
X = pd.get_dummies(X, drop_first=True)

In [15]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [17]:
param_grid = {
    "learning_rate": [0.05, 0.1],
    "n_estimators": [100, 200],
    "max_depth": [3, 4]
}

In [18]:
grid = GridSearchCV(GradientBoostingRegressor(), param_grid, cv=5, scoring="neg_root_mean_squared_error")
grid.fit(X_train, y_train)

best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

print("Best Parameters:", grid.best_params_)
print("RMSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

Best Parameters: {'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100}
RMSE: 772114690.6851822
R² Score: 0.9976031209438471


In [29]:
import pickle

with open("trained_model.sav", "wb") as f:
    pickle.dump(grid, f)

with open("trained_model.sav", "rb") as f:
    loaded_model = pickle.load(f)