# **Car Price Prediction**


## Importing Libraries


In [None]:
import numpy as np
import pandas as pd

## Importing Dataset

In [None]:
dataset = pd.read_csv('car_data.csv')
dataset.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner


In [None]:
dataset["selling_price"].isnull().sum()

0

In [None]:
dataset["fuel"].value_counts()

Diesel      2153
Petrol      2123
CNG           40
LPG           23
Electric       1
Name: fuel, dtype: int64

In [None]:
dataset["seller_type"].value_counts()

Individual          3244
Dealer               994
Trustmark Dealer     102
Name: seller_type, dtype: int64

## Encoding the Data

In [None]:
X = dataset.iloc[:, [1,3,4,6]].values
y = dataset.iloc[:, 2].values

In [None]:
X

array([[2007, 70000, 'Petrol', 'Manual'],
       [2007, 50000, 'Petrol', 'Manual'],
       [2012, 100000, 'Diesel', 'Manual'],
       ...,
       [2009, 83000, 'Petrol', 'Manual'],
       [2016, 90000, 'Diesel', 'Manual'],
       [2016, 40000, 'Petrol', 'Manual']], dtype=object)

In [None]:
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()
X[:,2]=lb.fit_transform(X[:,2])
lb1 = LabelEncoder()
X[:,3]=lb1.fit_transform(X[:,3])

In [None]:
X


array([[2007, 70000, 4, 1],
       [2007, 50000, 4, 1],
       [2012, 100000, 1, 1],
       ...,
       [2009, 83000, 4, 1],
       [2016, 90000, 1, 1],
       [2016, 40000, 4, 1]], dtype=object)

## Splitting the Data into Train and Test Set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.05, random_state = 0)

print(X_train[:,:])

[[2016 36000 1 1]
 [2014 70000 4 1]
 [2016 23000 4 1]
 ...
 [2016 22000 4 1]
 [2015 70000 1 1]
 [2013 62000 4 1]]


## Training the model

In [None]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=300,random_state=0)
regressor.fit(X_train,y_train)

RandomForestRegressor(n_estimators=300, random_state=0)

In [None]:
accuracy = regressor.score(X_test,y_test)
print(accuracy*100,'%')

85.74236935963135 %


## Performing a Test on given Input

In [None]:
new_data=[2017,7000,"Petrol","Manual"]
new_data[2]=lb.transform([new_data[2]])[0]
new_data[3]=lb1.transform([new_data[3]])[0]


In [None]:
print(new_data)
regressor.predict([new_data])

[2017, 7000, 4, 1]


array([624428.57142857])

## Saving the Model and Encoder using Pickle

In [None]:
import pickle
pickle.dump(regressor,open('regressor.pkl','wb'))
pickle.dump(lb,open('lb','wb'))
pickle.dump(lb1,open('lb1','wb'))