# Car Price Predictor By ML
###
 ### Goal :- In this project we will predict price of car

## (1) Importing the Required Libraries

In [127]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

## (2) Load the DataSet

In [128]:
# loading the data from csv file to pandas dataframe
car_dataset = pd.read_csv(r'A:\MTECH(Data Science)\DataSet\car_price_predictor-master\Cleaned_Car_data.csv')

In [129]:
# inspecting the first 5 rows of the dataframe
car_dataset.head()

Unnamed: 0.1,Unnamed: 0,name,company,year,Price,kms_driven,fuel_type
0,0,Hyundai Santro Xing,Hyundai,2007,80000,45000,Petrol
1,1,Mahindra Jeep CL550,Mahindra,2006,425000,40,Diesel
2,2,Hyundai Grand i10,Hyundai,2014,325000,28000,Petrol
3,3,Ford EcoSport Titanium,Ford,2014,575000,36000,Diesel
4,4,Ford Figo,Ford,2012,175000,41000,Diesel


In [130]:
# checking the number of rows and columns
car_dataset.shape

(816, 7)

In [131]:
# getting some information about the dataset
car_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 816 entries, 0 to 815
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  816 non-null    int64 
 1   name        816 non-null    object
 2   company     816 non-null    object
 3   year        816 non-null    int64 
 4   Price       816 non-null    int64 
 5   kms_driven  816 non-null    int64 
 6   fuel_type   816 non-null    object
dtypes: int64(4), object(3)
memory usage: 44.8+ KB


In [132]:
# Unnamed: 0 column will not affect the car price prediction so we will remove it.
car_dataset.drop(columns=['Unnamed: 0'], inplace=True)

In [133]:
# checking the number of missing values
car_dataset.isnull().sum()

name          0
company       0
year          0
Price         0
kms_driven    0
fuel_type     0
dtype: int64

In [134]:
# checking the distribution of categorical data
print(car_dataset.fuel_type.value_counts())

fuel_type
Petrol    428
Diesel    386
LPG         2
Name: count, dtype: int64


In [135]:
# Print unique vales from each column
for col in car_dataset.columns:
    print('Unique values of ' + col)
    print(car_dataset[col].unique())
    print("======================")

Unique values of name
['Hyundai Santro Xing' 'Mahindra Jeep CL550' 'Hyundai Grand i10'
 'Ford EcoSport Titanium' 'Ford Figo' 'Hyundai Eon'
 'Ford EcoSport Ambiente' 'Maruti Suzuki Alto' 'Skoda Fabia Classic'
 'Maruti Suzuki Stingray' 'Hyundai Elite i20' 'Mahindra Scorpio SLE'
 'Audi A8' 'Audi Q7' 'Mahindra Scorpio S10' 'Hyundai i20 Sportz'
 'Maruti Suzuki Vitara' 'Mahindra Bolero DI' 'Maruti Suzuki Swift'
 'Maruti Suzuki Wagon' 'Toyota Innova 2.0' 'Renault Lodgy 85'
 'Skoda Yeti Ambition' 'Maruti Suzuki Baleno' 'Renault Duster 110'
 'Renault Duster 85' 'Honda City 1.5' 'Maruti Suzuki Dzire' 'Honda Amaze'
 'Honda Amaze 1.5' 'Honda City' 'Datsun Redi GO' 'Maruti Suzuki SX4'
 'Mitsubishi Pajero Sport' 'Honda City ZX' 'Tata Indigo eCS'
 'Volkswagen Polo Highline' 'Chevrolet Spark LS' 'Renault Duster 110PS'
 'Mini Cooper S' 'Skoda Fabia 1.2L' 'Renault Duster' 'Mahindra Scorpio S4'
 'Mahindra Scorpio VLX' 'Mahindra Quanto C8' 'Ford EcoSport' 'Honda Brio'
 'Volkswagen Vento Highline' 'Hyundai

In [136]:
car_dataset.head()

Unnamed: 0,name,company,year,Price,kms_driven,fuel_type
0,Hyundai Santro Xing,Hyundai,2007,80000,45000,Petrol
1,Mahindra Jeep CL550,Mahindra,2006,425000,40,Diesel
2,Hyundai Grand i10,Hyundai,2014,325000,28000,Petrol
3,Ford EcoSport Titanium,Ford,2014,575000,36000,Diesel
4,Ford Figo,Ford,2012,175000,41000,Diesel


### (3) convert all the categorical variables into numerical format.

In [137]:
# LabelEncoder = Used to convert categorical variables into numerical format.
#              = It assigns a numerical label to each category.
from sklearn.preprocessing import LabelEncoder
LE = LabelEncoder() # Call LabelEncoder fun. and assign into LE variable
car_dataset.iloc[:,0] = LE.fit_transform(car_dataset.iloc[:,0].values) # iloc is used for position based selection.
car_dataset.iloc[:,1] = LE.fit_transform(car_dataset.iloc[:,1].values)
car_dataset.iloc[:,5] = LE.fit_transform(car_dataset.iloc[:,5].values)

In [138]:
# view after convert all the categorical variables into numerical format.
car_dataset.head()

Unnamed: 0,name,company,year,Price,kms_driven,fuel_type
0,91,9,2007,80000,45000,2
1,118,13,2006,425000,40,0
2,88,9,2014,325000,28000,2
3,40,6,2014,575000,36000,0
4,45,6,2012,175000,41000,0


## (3) Separating the Independent Variable(X) & Dependent Variable(Y)

In [140]:
x = car_dataset.drop(['Price'],axis=1)
y = car_dataset['Price']

In [155]:
x.head()

Unnamed: 0,name,company,year,kms_driven,fuel_type
0,91,9,2007,45000,2
1,118,13,2006,40,0
2,88,9,2014,28000,2
3,40,6,2014,36000,0
4,45,6,2012,41000,0


In [158]:
y.head()

0     80000
1    425000
2    325000
3    575000
4    175000
Name: Price, dtype: int64

### (4) Splitting Training and Test data

In [143]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

### (5). Model Creation

In [144]:
model = LinearRegression()

### (6). Train Model

In [145]:
model.fit(x_train, y_train)

### (7). Test Model

In [146]:
predict = model.predict(x_test)

In [154]:
car_dataset.head(3)

Unnamed: 0,name,company,year,Price,kms_driven,fuel_type
0,91,9,2007,80000,45000,2
1,118,13,2006,425000,40,0
2,88,9,2014,325000,28000,2


### 8. Car Price Prediction

In [151]:
a = input('Enter Name code : ')
b = input('Enter company code : ')
c = input('Enter year : ')
d = input('Enter kms_driven : ')
e = input('Enter fuel_type code : ')
input_data_model = pd.DataFrame(
    [[a,b,c,d,e]],
    columns=['name','company','year','kms_driven','fuel_type'])

Enter Name code :  91
Enter company code :  9
Enter year :  2007
Enter kms_driven :  45000
Enter fuel_type code :  2


In [152]:
input_data_model

Unnamed: 0,name,company,year,kms_driven,fuel_type
0,91,9,2007,45000,2


In [153]:
print('Predicted price will be : ',model.predict(input_data_model))

Predicted price will be :  [198053.71393685]


# **********************************************************************

## Name - Aatish Kumar Baitha
  - M.Tech(Data Science 2nd Year Student)
- My Linkedin Profile -
  - https://www.linkedin.com/in/aatish-kumar-baitha-ba9523191
- My Blog
  - https://computersciencedatascience.blogspot.com/
- My Github Profile
  - https://github.com/Aatishkb

# Thank you!