***predict car selling price based on car model, age and mileage***

#  Using dummies values approach

In [19]:
import pandas as pd
from sklearn.linear_model import LinearRegression

In [20]:
# load data
df = pd.read_csv('carprices.csv')

In [21]:
df

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [22]:
# use get_dummies in pandas
dummies = pd.get_dummies(df['Car Model'])
dummies

Unnamed: 0,Audi A5,BMW X5,Mercedez Benz C class
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,0,0,1


In [23]:
# adding dummy variable cols
df_concat = pd.concat([df, dummies], axis='columns')

In [24]:
# get the final data frame
df_final = df_concat.drop(['Car Model','Mercedez Benz C class'] , axis='columns')     #remove merced_benz col
df_final

Unnamed: 0,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5
0,69000,18000,6,0,1
1,35000,34000,3,0,1
2,57000,26100,5,0,1
3,22500,40000,2,0,1
4,46000,31500,4,0,1
5,59000,29400,5,1,0
6,52000,32000,5,1,0
7,72000,19300,6,1,0
8,91000,12000,8,1,0
9,67000,22000,6,0,0


In [38]:
x = df_final.drop(['Sell Price($)'] , axis='columns')
x.head(5)

Unnamed: 0,Mileage,Age(yrs),Audi A5,BMW X5
0,69000,6,0,1
1,35000,3,0,1
2,57000,5,0,1
3,22500,2,0,1
4,46000,4,0,1


In [39]:
y = df_final['Sell Price($)']
y.head(5)

0    18000
1    34000
2    26100
3    40000
4    31500
Name: Sell Price($), dtype: int64

In [40]:
# training model
model = LinearRegression()
model.fit(x, y)

LinearRegression()

##### Predict price of a mercedez benz that is 4 yr old with mileage 45000

In [67]:
model.predict([[45000,4,0,0]])    # 36991.31721061 $

array([36991.31721061])

##### Predict price of a BMW X5 that is 7 yr old with mileage 86000

In [68]:
model.predict([[86000,7,0,1]])   # 11080.74313219 $

array([11080.74313219])

In [43]:
# accuracy
model.score(x,y)

0.9417050937281082

# Using sklearn OneHotEncoder

In [90]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

#### Label Encoding 

In [91]:
le = LabelEncoder()
df_le = df
df_le['Car Model'] = le.fit_transform(df_le['Car Model'])
df_le

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,1,69000,18000,6
1,1,35000,34000,3
2,1,57000,26100,5
3,1,22500,40000,2
4,1,46000,31500,4
5,0,59000,29400,5
6,0,52000,32000,5
7,0,72000,19300,6
8,0,91000,12000,8
9,2,67000,22000,6


#### One Hot Encoding

In [92]:
x = df_le[['Car Model', 'Mileage', 'Age(yrs)']].values
x

array([[    1, 69000,     6],
       [    1, 35000,     3],
       [    1, 57000,     5],
       [    1, 22500,     2],
       [    1, 46000,     4],
       [    0, 59000,     5],
       [    0, 52000,     5],
       [    0, 72000,     6],
       [    0, 91000,     8],
       [    2, 67000,     6],
       [    2, 83000,     7],
       [    2, 79000,     7],
       [    2, 59000,     5]], dtype=int64)

In [93]:
y = df_le['Sell Price($)'].values
y

array([18000, 34000, 26100, 40000, 31500, 29400, 32000, 19300, 12000,
       22000, 20000, 21000, 33000], dtype=int64)

In [94]:
# [0] --->  telling only one col is categorical and to be encodered
ct = ColumnTransformer([('Car Model', OneHotEncoder(), [0])], remainder = 'passthrough')  

In [95]:
x = ct.fit_transform(x)
x

array([[0.00e+00, 1.00e+00, 0.00e+00, 6.90e+04, 6.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, 3.50e+04, 3.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, 5.70e+04, 5.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, 2.25e+04, 2.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, 4.60e+04, 4.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, 5.90e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, 5.20e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, 7.20e+04, 6.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, 9.10e+04, 8.00e+00],
       [0.00e+00, 0.00e+00, 1.00e+00, 6.70e+04, 6.00e+00],
       [0.00e+00, 0.00e+00, 1.00e+00, 8.30e+04, 7.00e+00],
       [0.00e+00, 0.00e+00, 1.00e+00, 7.90e+04, 7.00e+00],
       [0.00e+00, 0.00e+00, 1.00e+00, 5.90e+04, 5.00e+00]])

In [96]:
# dropping one col
x = x[:, 1:]       # drop 'Audi' col
x                  # now having --> 'BMW', 'Mecd'

array([[1.00e+00, 0.00e+00, 6.90e+04, 6.00e+00],
       [1.00e+00, 0.00e+00, 3.50e+04, 3.00e+00],
       [1.00e+00, 0.00e+00, 5.70e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 2.25e+04, 2.00e+00],
       [1.00e+00, 0.00e+00, 4.60e+04, 4.00e+00],
       [0.00e+00, 0.00e+00, 5.90e+04, 5.00e+00],
       [0.00e+00, 0.00e+00, 5.20e+04, 5.00e+00],
       [0.00e+00, 0.00e+00, 7.20e+04, 6.00e+00],
       [0.00e+00, 0.00e+00, 9.10e+04, 8.00e+00],
       [0.00e+00, 1.00e+00, 6.70e+04, 6.00e+00],
       [0.00e+00, 1.00e+00, 8.30e+04, 7.00e+00],
       [0.00e+00, 1.00e+00, 7.90e+04, 7.00e+00],
       [0.00e+00, 1.00e+00, 5.90e+04, 5.00e+00]])

In [97]:
model2 = LinearRegression()
model2.fit(x,y)

LinearRegression()

##### Predict price of a mercedez benz that is 4 yr old with mileage 45000

In [98]:
model2.predict([[0,1,45000,4]])     # 36991.31721063 $

array([36991.31721063])

##### Predict price of a BMW X5 that is 7 yr old with mileage 86000

In [100]:
model2.predict([[1,0,86000,7]])     # 11080.74313217 $

array([11080.74313217])