In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [75]:
dataset = pd.read_csv("car_price.csv")
dataset.drop("CarName",axis=1,inplace=True)
dataset.head()

Unnamed: 0,car_ID,symboling,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,gas,std,two,convertible,rwd,front,88.6,168.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,gas,std,two,convertible,rwd,front,88.6,168.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,gas,std,two,hatchback,rwd,front,94.5,171.2,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,gas,std,four,sedan,fwd,front,99.8,176.6,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,gas,std,four,sedan,4wd,front,99.4,176.6,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


In [65]:
x = dataset.iloc[: , :-1] # Feature Matrix
y = dataset.iloc[: ,-1]   # Dependent Variable Vector

In [66]:
x.head()

Unnamed: 0,car_ID,symboling,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,...,cylindernumber,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg
0,1,3,gas,std,two,convertible,rwd,front,88.6,168.8,...,four,130,mpfi,3.47,2.68,9.0,111,5000,21,27
1,2,3,gas,std,two,convertible,rwd,front,88.6,168.8,...,four,130,mpfi,3.47,2.68,9.0,111,5000,21,27
2,3,1,gas,std,two,hatchback,rwd,front,94.5,171.2,...,six,152,mpfi,2.68,3.47,9.0,154,5000,19,26
3,4,2,gas,std,four,sedan,fwd,front,99.8,176.6,...,four,109,mpfi,3.19,3.4,10.0,102,5500,24,30
4,5,2,gas,std,four,sedan,4wd,front,99.4,176.6,...,five,136,mpfi,3.19,3.4,8.0,115,5500,18,22


In [67]:
y.head()

0    13495.0
1    16500.0
2    16500.0
3    13950.0
4    17450.0
Name: price, dtype: float64

In [68]:
# Looking for missing data
dataset.isnull().sum()

car_ID              0
symboling           0
fueltype            0
aspiration          0
doornumber          0
carbody             0
drivewheel          0
enginelocation      0
wheelbase           0
carlength           0
carwidth            0
carheight           0
curbweight          0
enginetype          0
cylindernumber      0
enginesize          0
fuelsystem          0
boreratio           0
stroke              0
compressionratio    0
horsepower          0
peakrpm             0
citympg             0
highwaympg          0
price               0
dtype: int64

In [69]:
# Encoding Categorical Columns
x = pd.get_dummies(data= x, columns= ["fueltype","aspiration","doornumber","carbody","drivewheel","enginelocation","cylindernumber","fuelsystem","enginetype"])
x

Unnamed: 0,car_ID,symboling,wheelbase,carlength,carwidth,carheight,curbweight,enginesize,boreratio,stroke,...,fuelsystem_mpfi,fuelsystem_spdi,fuelsystem_spfi,enginetype_dohc,enginetype_dohcv,enginetype_l,enginetype_ohc,enginetype_ohcf,enginetype_ohcv,enginetype_rotor
0,1,3,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,...,1,0,0,1,0,0,0,0,0,0
1,2,3,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,...,1,0,0,1,0,0,0,0,0,0
2,3,1,94.5,171.2,65.5,52.4,2823,152,2.68,3.47,...,1,0,0,0,0,0,0,0,1,0
3,4,2,99.8,176.6,66.2,54.3,2337,109,3.19,3.40,...,1,0,0,0,0,0,1,0,0,0
4,5,2,99.4,176.6,66.4,54.3,2824,136,3.19,3.40,...,1,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,201,-1,109.1,188.8,68.9,55.5,2952,141,3.78,3.15,...,1,0,0,0,0,0,1,0,0,0
201,202,-1,109.1,188.8,68.8,55.5,3049,141,3.78,3.15,...,1,0,0,0,0,0,1,0,0,0
202,203,-1,109.1,188.8,68.9,55.5,3012,173,3.58,2.87,...,1,0,0,0,0,0,0,0,1,0
203,204,-1,109.1,188.8,68.9,55.5,3217,145,3.01,3.40,...,0,0,0,0,0,0,1,0,0,0


In [70]:
# Train-Test Split
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [71]:
# Training Model
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train, y_train)

In [72]:
# Predicting 
y_pred = regressor.predict(x_test)
np.set_printoption(precision=2)
y_pred

array([ 6173.48, 18288.33, 15168.1 ,   944.99,  9230.97, 12359.66,
        6818.74,  4908.17, 17041.35,  7272.47, 19923.66, 30978.95,
       12752.27, 15697.38,  7420.09, 11833.9 ,  9218.09, 20269.32,
        8861.03,  6382.59, 10089.64, 15892.3 ,  9306.73, 12786.6 ,
       20371.01,  7500.28,  6434.84, 15995.93,  6020.83,  5320.47,
        8536.  ,  9710.08, 16663.02,  8875.8 ,  6345.63, 28148.09,
       11575.85, 13280.9 ,  6243.7 , 36684.9 ,  6841.88])

In [76]:
# Evaluating Model
from sklearn.metrics import r2_score
r2 = r2_score(y_pred,y_test)
r2

0.8134711323548137