In [65]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib


In [66]:
# Step 2: Load Dataset
df = pd.read_csv("Car details v3.csv")
df.head()


Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage,engine,max_power,torque,seats
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel,Individual,Manual,First Owner,23.4 kmpl,1248 CC,74 bhp,190Nm@ 2000rpm,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel,Individual,Manual,Second Owner,21.14 kmpl,1498 CC,103.52 bhp,250Nm@ 1500-2500rpm,5.0
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol,Individual,Manual,Third Owner,17.7 kmpl,1497 CC,78 bhp,"12.7@ 2,700(kgm@ rpm)",5.0
3,Hyundai i20 Sportz Diesel,2010,225000,127000,Diesel,Individual,Manual,First Owner,23.0 kmpl,1396 CC,90 bhp,22.4 kgm at 1750-2750rpm,5.0
4,Maruti Swift VXI BSIII,2007,130000,120000,Petrol,Individual,Manual,First Owner,16.1 kmpl,1298 CC,88.2 bhp,"11.5@ 4,500(kgm@ rpm)",5.0


In [67]:
# Step 3: Feature Engineering

# Create 'car_age' feature from 'year'
df['car_age'] = 2025 - df['year']

# Encode categorical features
df['fuel_type_petrol'] = (df['fuel'] == 'Petrol').astype(int)
df['fuel_type_diesel'] = (df['fuel'] == 'Diesel').astype(int)
df['seller_type_individual'] = (df['seller_type'] == 'Individual').astype(int)
df['transmission_manual'] = (df['transmission'] == 'Manual').astype(int)

# Encode 'owner' text to numeric
owner_map = {
    'First Owner': 0,
    'Second Owner': 1,
    'Third Owner': 2,
    'Fourth & Above Owner': 3,
    'Test Drive Car': 4
}
df['owner'] = df['owner'].map(owner_map)
df = df.dropna(subset=['owner'])
df['owner'] = df['owner'].astype(int)


In [68]:
# Step 4: Define Features and Target

X = df[[
    'selling_price',        # used as proxy for present_price
    'km_driven',
    'car_age',
    'fuel_type_diesel',
    'fuel_type_petrol',
    'seller_type_individual',
    'transmission_manual',
    'owner'
]]

y = df['selling_price']


In [69]:
# Step 5: Split and Train the Model

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)


In [71]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("MAE:", mae)
print("RMSE:", rmse)
print("R² Score:", r2)


MAE: 569.6497847478473
RMSE: 10860.765617908295
R² Score: 0.9998200472466832


In [72]:
# Step 7: Save the Model
joblib.dump(model, 'car_price_model.pkl')
print("✅ Model saved as car_price_model.pkl")


✅ Model saved as car_price_model.pkl
