#                        Car Price Prediction Uing different Regression Model

### Importing Libraries

In [3]:
import pandas as pd 
import numpy as np


In [4]:
df=pd.read_csv('data.csv')

In [5]:
df.head()

Unnamed: 0,symboling,normalized-losses,make,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,...,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price,city-L/100km,horsepower-binned,diesel,gas
0,3,122,alfa-romero,std,two,convertible,rwd,front,88.6,0.811148,...,9.0,111.0,5000.0,21,27,13495.0,11.190476,Medium,0,1
1,3,122,alfa-romero,std,two,convertible,rwd,front,88.6,0.811148,...,9.0,111.0,5000.0,21,27,16500.0,11.190476,Medium,0,1
2,1,122,alfa-romero,std,two,hatchback,rwd,front,94.5,0.822681,...,9.0,154.0,5000.0,19,26,16500.0,12.368421,Medium,0,1
3,2,164,audi,std,four,sedan,fwd,front,99.8,0.84863,...,10.0,102.0,5500.0,24,30,13950.0,9.791667,Medium,0,1
4,2,164,audi,std,four,sedan,4wd,front,99.4,0.84863,...,8.0,115.0,5500.0,18,22,17450.0,13.055556,Medium,0,1


# Linear Regression
### 'highway-mpg' vs 'price'


In [6]:
from sklearn.linear_model import LinearRegression
x1=df[['highway-mpg']]
y1=df['price']

lm= LinearRegression()

lm.fit(x1,y1)
yhat=lm.predict(x1)


In [7]:
print("Actual price: ", y1[0:5])
print("\n")
print("Predicted price: ", yhat[0:5])

Actual price:  0    13495.0
1    16500.0
2    16500.0
3    13950.0
4    17450.0
Name: price, dtype: float64


Predicted price:  [16236.50464347 16236.50464347 17058.23802179 13771.3045085
 20345.17153508]


### 'Engine Size' vs 'price'

In [8]:
x2=df[['engine-size']]
y2=df['price']

lm2= LinearRegression()

lm2.fit(x2,y2)
yhat2=lm2.predict(x2)

In [9]:
print("Actual Price : ", df['price'].head())
print("\n")
print('Predicted: ',yhat2[0:5])

Actual Price :  0    13495.0
1    16500.0
2    16500.0
3    13950.0
4    17450.0
Name: price, dtype: float64


Predicted:  [13728.4631336  13728.4631336  17399.38347881 10224.40280408
 14729.62322775]


## Calculating R^2 Score and MSE for LM

In [10]:
print("R^2 Score:  ",lm2.score(x2,y2))

R^2 Score:   0.7609686443622008


In [11]:
from sklearn.metrics import mean_squared_error

mse=mean_squared_error(df['price'],yhat2)

print("Mean squared Error: ",mse)

Mean squared Error:  15021126.02517414


# Multiple Regression

In [12]:
mlr=LinearRegression()

y3=df['price']
x3=df[['horsepower', 'curb-weight', 'engine-size', 'highway-mpg']]

mlr.fit(x3,y3)

yhat3 = mlr.predict(x3)



In [13]:
print("Actual price: ", y3.head())
print("\n")
print("Predicted price: ",yhat3[0:5])

Actual price:  0    13495.0
1    16500.0
2    16500.0
3    13950.0
4    17450.0
Name: price, dtype: float64


Predicted price:  [13699.11161184 13699.11161184 19051.65470233 10620.36193015
 15521.31420211]


In [14]:
print("R^2 Score:  ",mlr.score(x3,y3))

R^2 Score:   0.8093562806577457


In [15]:
mse1=mean_squared_error(df['price'],yhat3)

print("MSE : ",mse1)

MSE :  11980366.87072649


## Polynomial Regression and Pipeline

In [16]:
from sklearn.preprocessing import PolynomialFeatures

In [32]:
pr = PolynomialFeatures(degree=2)
x = df['highway-mpg']
y = df['price']
f = np.polyfit(x, y, 3)
p = np.poly1d(f)
print(p)
z = df[['horsepower', 'curb-weight', 'engine-size', 'highway-mpg']]

3         2
-1.557 x + 204.8 x - 8965 x + 1.379e+05


In [33]:
Z_pr=pr.fit_transform(z)

In [42]:
z.shape

(201, 4)

In [43]:
Z_pr.shape

(201, 15)

In [34]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [35]:
Input=[('scale',StandardScaler()), ('polynomial', PolynomialFeatures(include_bias=False)), ('model',LinearRegression())]

In [36]:
pipe=Pipeline(Input)
pipe

Pipeline(steps=[('scale', StandardScaler()),
                ('polynomial', PolynomialFeatures(include_bias=False)),
                ('model', LinearRegression())])

In [37]:
pipe.fit(z,y)

Pipeline(steps=[('scale', StandardScaler()),
                ('polynomial', PolynomialFeatures(include_bias=False)),
                ('model', LinearRegression())])

In [38]:
ypipe=pipe.predict(z)
print("Actual Price: ",y[0:5])
print("\n")
print("Predicted price :",ypipe[0:5])

Actual Price:  0    13495.0
1    16500.0
2    16500.0
3    13950.0
4    17450.0
Name: price, dtype: float64


Predicted price : [13102.74784201 13102.74784201 18225.54572197 10390.29636555
 16136.29619164]


In [39]:
from sklearn.metrics import r2_score
r_squared = r2_score(y, p(x))
print('The R-square value is: ', r_squared)

The R-square value is:  0.6741946663906515


In [41]:
print("MSE :",mean_squared_error(df['price'], p(x)))

MSE : 20474146.42636124


# Conclusion

## Multiple Linear Model is the best Model to predcit price , Because its having More R^2 Score when comapre to others ie: 80.9 % and less Mean Square Error is (1.2 * 10^7) 

###  :) Pranesh Patil