# Car Price Prediction Project

This notebook demonstrates how to build an ML model to predict car prices using scikit-learn.

In [3]:
# Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

## Load Data

In [4]:
# Load the dataset (ensure 'car data.csv' is in the same directory)
df = pd.read_csv('car data.csv')
df.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


## Data Preprocessing

In [5]:
# Target and features
y = df['Selling_Price']
X = df.drop(['Selling_Price', 'Car_Name'], axis=1)

# Categorical and numerical columns
categorical_cols = ['Fuel_Type', 'Selling_type', 'Transmission']
numerical_cols = ['Year', 'Present_Price', 'Driven_kms', 'Owner']

In [6]:
# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(drop='first'), categorical_cols)
])

## Train-Test Split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Build and Train the Model

In [8]:
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Train
model.fit(X_train, y_train)

## Model Evaluation

In [9]:
y_pred = model.predict(X_test)

print("Test R^2 Score:", r2_score(y_test, y_pred))
print("Test RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

Test R^2 Score: 0.9635778536357699
Test RMSE: 0.9159723964091963


## Save Model

In [10]:
joblib.dump(model, 'car_price_model.pkl')
print("Model saved as car_price_model.pkl")

Model saved as car_price_model.pkl


## Predict on New Data Example

In [11]:
# Define a function for prediction
def predict_price(new_data_dict):
    new_df = pd.DataFrame([new_data_dict])
    return model.predict(new_df)[0]

# Example usage
new_car = {
    'Year': 2018,
    'Present_Price': 9.83,
    'Driven_kms': 2071,
    'Fuel_Type': 'Diesel',
    'Selling_type': 'Dealer',
    'Transmission': 'Manual',
    'Owner': 0
}

print("Predicted Selling Price for new car:", predict_price(new_car))

Predicted Selling Price for new car: 7.9244
