# **Car Price Prediction**


## Importing Libraries


In [3]:
import numpy as np
import pandas as pd

## Importing Dataset

In [5]:
dataset = pd.read_csv('car_data.csv')
dataset.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner


In [6]:
dataset["selling_price"].isnull().sum()

0

In [7]:
dataset["fuel"].value_counts()

fuel
Diesel      2153
Petrol      2123
CNG           40
LPG           23
Electric       1
Name: count, dtype: int64

In [8]:
dataset["seller_type"].value_counts()

seller_type
Individual          3244
Dealer               994
Trustmark Dealer     102
Name: count, dtype: int64

## Encoding the Data

In [10]:
X = dataset.iloc[:, [1,3,4,6]].values
y = dataset.iloc[:, 2].values

In [11]:
X

array([[2007, 70000, 'Petrol', 'Manual'],
       [2007, 50000, 'Petrol', 'Manual'],
       [2012, 100000, 'Diesel', 'Manual'],
       ...,
       [2009, 83000, 'Petrol', 'Manual'],
       [2016, 90000, 'Diesel', 'Manual'],
       [2016, 40000, 'Petrol', 'Manual']], dtype=object)

In [12]:
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()
X[:,2]=lb.fit_transform(X[:,2])
lb1 = LabelEncoder()
X[:,3]=lb1.fit_transform(X[:,3])

In [13]:
X


array([[2007, 70000, 4, 1],
       [2007, 50000, 4, 1],
       [2012, 100000, 1, 1],
       ...,
       [2009, 83000, 4, 1],
       [2016, 90000, 1, 1],
       [2016, 40000, 4, 1]], dtype=object)

## Splitting the Data into Train and Test Set

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.05, random_state = 0)

print(X_train[:,:])

[[2016 36000 1 1]
 [2014 70000 4 1]
 [2016 23000 4 1]
 ...
 [2016 22000 4 1]
 [2015 70000 1 1]
 [2013 62000 4 1]]


## Training the model with LinearRegression

In [17]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X_train,y_train)

In [18]:
accuracy_lm = lm.score(X_test,y_test)
print(accuracy_lm*100,'%')

38.75285809589857 %


In [19]:
from sklearn.metrics import r2_score
pred_lm= lm.predict(X_test)
r2_score(pred_lm,y_test)

-0.577679788835769

## Training the model with Support Vector Regressor

In [21]:
from sklearn.svm import SVR
svr = SVR(kernel='sigmoid', degree=8, gamma='scale', coef0=0.0, tol=0.001, C=1.0, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1,)
svr.fit(X_train,y_train)

In [22]:
accuracy_svr = svr.score(X_test,y_test)
print(accuracy_svr*100,'%')

-5.800741580171054 %


In [23]:
pred_svr= svr.predict(X_test)
r2_score(pred_svr,y_test)

-69811993.41135542

## Training the model with RandomForest

In [25]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=300,random_state=0)
regressor.fit(X_train,y_train)

In [26]:
accuracy = regressor.score(X_test,y_test)
print(accuracy*100,'%')

85.74236935963135 %


In [27]:
pred=regressor.predict(X_test)

In [28]:
r2_score(pred,y_test)

0.8410810149872415

## Performing a Test on given Input

In [30]:
new_data=[2017,7000,"Petrol","Manual"]
new_data[2]=lb.transform([new_data[2]])[0]
new_data[3]=lb1.transform([new_data[3]])[0]


In [31]:
print(new_data)
regressor.predict([new_data])

[2017, 7000, 4, 1]


array([624428.57142857])