In [1]:
# train_model.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import pickle

# Load dataset
data = pd.read_csv("cardekho.csv")

# Drop rows with missing values
data = data.dropna()

# Encode categorical columns
le = LabelEncoder()
for col in ['fuel', 'seller_type', 'transmission', 'owner']:
    data[col] = le.fit_transform(data[col])

# Feature selection
X = data[['year', 'km_driven', 'fuel', 'seller_type', 'transmission', 'owner', 'engine', 'seats']]
y = data['selling_price']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print("R2 Score:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))

# Save model with pickle
with open("car_price_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("✅ Model trained and saved successfully as car_price_model.pkl")
data['fuel'].unique()


R2 Score: 0.9596530255572867
MAE: 86921.62588251987
✅ Model trained and saved successfully as car_price_model.pkl


array([1, 3, 2, 0])

In [2]:
data = pd.read_csv('cardekho.csv')

In [3]:
data['fuel'].unique()

array(['Diesel', 'Petrol', 'LPG', 'CNG'], dtype=object)

In [4]:
data['seats'].unique() > 50

array([False, False, False, False, False, False, False, False, False,
       False])

In [5]:
data.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage(km/ltr/kg),engine,max_power,seats
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel,Individual,Manual,First Owner,23.4,1248.0,74.0,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel,Individual,Manual,Second Owner,21.14,1498.0,103.52,5.0
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol,Individual,Manual,Third Owner,17.7,1497.0,78.0,5.0
3,Hyundai i20 Sportz Diesel,2010,225000,127000,Diesel,Individual,Manual,First Owner,23.0,1396.0,90.0,5.0
4,Maruti Swift VXI BSIII,2007,130000,120000,Petrol,Individual,Manual,First Owner,16.1,1298.0,88.2,5.0
