# Predicting price of a car using Linear Regression

In [45]:
# Import Libraries
import pandas as pd

In [46]:
# Reading the dataset
data = pd.read_csv(r"C:\Users\prave\OneDrive\Desktop\NPTEL\Datasets\carprices.csv")

In [47]:
# Explore the dataframe
data.head()

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4


In [48]:
data.shape

(13, 4)

In [49]:
data.columns

Index(['Car Model', 'Mileage', 'Sell Price($)', 'Age(yrs)'], dtype='object')

In [50]:
# Perform one hot encoding on dataset
one_hot_encoded_data = pd.get_dummies(data)
one_hot_encoded_data

Unnamed: 0,Mileage,Sell Price($),Age(yrs),Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class
0,69000,18000,6,0,1,0
1,35000,34000,3,0,1,0
2,57000,26100,5,0,1,0
3,22500,40000,2,0,1,0
4,46000,31500,4,0,1,0
5,59000,29400,5,1,0,0
6,52000,32000,5,1,0,0
7,72000,19300,6,1,0,0
8,91000,12000,8,1,0,0
9,67000,22000,6,0,0,1


In [52]:
# Dropping "Sell Price" column 
one_hot_encoded_data.drop(columns="Sell Price($)",axis=1)

Unnamed: 0,Mileage,Age(yrs),Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class
0,69000,6,0,1,0
1,35000,3,0,1,0
2,57000,5,0,1,0
3,22500,2,0,1,0
4,46000,4,0,1,0
5,59000,5,1,0,0
6,52000,5,1,0,0
7,72000,6,1,0,0
8,91000,8,1,0,0
9,67000,6,0,0,1


In [58]:
# Re-arranging the columns
mid = one_hot_encoded_data["Sell Price($)"]
one_hot_encoded_data.insert(6,"SellingPrice",mid)

In [60]:
# Dropping the unwanted column
cleaned_data_v1 = one_hot_encoded_data.drop("Sell Price($)",axis=1)
cleaned_data_v1

Unnamed: 0,Mileage,Age(yrs),Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class,SellingPrice
0,69000,6,0,1,0,18000
1,35000,3,0,1,0,34000
2,57000,5,0,1,0,26100
3,22500,2,0,1,0,40000
4,46000,4,0,1,0,31500
5,59000,5,1,0,0,29400
6,52000,5,1,0,0,32000
7,72000,6,1,0,0,19300
8,91000,8,1,0,0,12000
9,67000,6,0,0,1,22000


In [61]:
# Dropping the third one hot encoded column for avoiding dummy variable trap
Final_Data = cleaned_data_v1.drop("Car Model_Mercedez Benz C class", axis=1)
Final_Data

Unnamed: 0,Mileage,Age(yrs),Car Model_Audi A5,Car Model_BMW X5,SellingPrice
0,69000,6,0,1,18000
1,35000,3,0,1,34000
2,57000,5,0,1,26100
3,22500,2,0,1,40000
4,46000,4,0,1,31500
5,59000,5,1,0,29400
6,52000,5,1,0,32000
7,72000,6,1,0,19300
8,91000,8,1,0,12000
9,67000,6,0,0,22000


In [62]:
# Importing Scikit learn
from sklearn import linear_model

In [63]:
# Creating linear regression model object
regression_object = linear_model.LinearRegression()

In [65]:
# For supplying independent variables
X = Final_Data.drop("SellingPrice",axis=1)
X

Unnamed: 0,Mileage,Age(yrs),Car Model_Audi A5,Car Model_BMW X5
0,69000,6,0,1
1,35000,3,0,1
2,57000,5,0,1
3,22500,2,0,1
4,46000,4,0,1
5,59000,5,1,0
6,52000,5,1,0
7,72000,6,1,0
8,91000,8,1,0
9,67000,6,0,0


In [68]:
# Dependent variable
y =Final_Data.SellingPrice
y

0     18000
1     34000
2     26100
3     40000
4     31500
5     29400
6     32000
7     19300
8     12000
9     22000
10    20000
11    21000
12    33000
Name: SellingPrice, dtype: int64

In [69]:
# Training ML Model
regression_object.fit(X,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [72]:
# Price prediction of Mercedez Benz that is 4 yr old with mileage 45000
regression_object.predict([[45000,4,0,0]])[0]

36991.31721061283

In [73]:
# Price prediction of BMW X5 that is 7 yr old with mileage 86000
regression_object.predict([[86000,7,0,1]])[0]

11080.743132190648