<a href="https://colab.research.google.com/github/Somiddhya09/CarPredictionModel/blob/main/CarPricePrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Car Price Prediction**

## Importing libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

## Importing dataset

In [None]:
df=pd.read_csv("/content/drive/MyDrive/car_price.csv")
df.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


## Clean dataset

In [None]:
#lambda x: x.split(" ")[0] is a concise way to define a function that takes a string, splits it by spaces, and returns the first word.
df["manufacturer"] = df["CarName"].apply(lambda x: x.split(" ")[0])
df=df.drop(columns=["car_ID","CarName"])

## Separate Features (X) and Target (y)

In [None]:
X=df.drop(columns=["price"])
y=df["price"]

## Identify categorical and numerical columns

In [None]:
categorical_cols=X.select_dtypes(include=["object"]).columns
numerical_cols=X.select_dtypes(exclude=["object"]).columns

## Preprocessing (One-Hot Encode categorical columns)

In [None]:
preprocess = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
], remainder='passthrough')

## Create Random Forest Model Pipeline

In [None]:
model = Pipeline(steps=[
    ('preprocess', preprocess),
    ('rf', RandomForestRegressor(
        n_estimators=300,
        max_depth=None,
        random_state=42
    ))
])

## Train/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Train the model

In [None]:
model.fit(X_train, y_train)


The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



## Evaluate the model

In [None]:
y_pred = model.predict(X_test)

print("R2 Score:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))


R2 Score: 0.9582851728351611
MAE: 1247.260804878049
RMSE: 1814.700026641416


## Predict price for a new car example

In [None]:
sample = X.iloc[0:2]     # using first row as example

predicted_price = model.predict(sample)
print("Predicted Price:", predicted_price)


Predicted Price: [14499.43111111 14499.43111111]
