In [28]:
import pandas as pd
from sklearn.linear_model import LinearRegression
#from sklearn.metrics import score

In [29]:
df = pd.read_csv('cars.csv')
df

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


# Using get-dummies pandas method:

In [30]:
dummies = pd.get_dummies(df['Car Model'])
dummies

Unnamed: 0,Audi A5,BMW X5,Mercedez Benz C class
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,0,0,1


In [31]:
merged = pd.concat([df, dummies], axis=1)
merged

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5,Mercedez Benz C class
0,BMW X5,69000,18000,6,0,1,0
1,BMW X5,35000,34000,3,0,1,0
2,BMW X5,57000,26100,5,0,1,0
3,BMW X5,22500,40000,2,0,1,0
4,BMW X5,46000,31500,4,0,1,0
5,Audi A5,59000,29400,5,1,0,0
6,Audi A5,52000,32000,5,1,0,0
7,Audi A5,72000,19300,6,1,0,0
8,Audi A5,91000,12000,8,1,0,0
9,Mercedez Benz C class,67000,22000,6,0,0,1


In [32]:
final = merged.drop(['Car Model'], axis=1)

In [33]:
x_train = final.drop(['Sell Price($)'], axis=1)
x_train

Unnamed: 0,Mileage,Age(yrs),Audi A5,BMW X5,Mercedez Benz C class
0,69000,6,0,1,0
1,35000,3,0,1,0
2,57000,5,0,1,0
3,22500,2,0,1,0
4,46000,4,0,1,0
5,59000,5,1,0,0
6,52000,5,1,0,0
7,72000,6,1,0,0
8,91000,8,1,0,0
9,67000,6,0,0,1


In [34]:
y = final['Sell Price($)']

In [35]:
model = LinearRegression()
model.fit(x_train, y)

LinearRegression()

In [36]:
print(f'Intercept: {model.intercept_}')
print(f'Coefficents: {model.coef_}')

Intercept: 55912.70994756205
Coefficents: [-3.70122094e-01 -1.33245363e+03  6.10375284e+02 -3.67429130e+03
  3.06391602e+03]


In [37]:
prediction1 = model.predict([[45000, 4, 0, 0, 1]])
print(prediction1)

[36991.31721061]


In [38]:
prediction2 = model.predict([[86000, 6, 0, 1, 0]])
print(prediction2)

[12413.1967598]


In [39]:
model.score(x_train, y)

0.9417050937281082

---
##### Using dummies variable encoding:

In [40]:
x_train2 = final.drop(['Sell Price($)', 'Mercedez Benz C class'], axis=1)
x_train2

Unnamed: 0,Mileage,Age(yrs),Audi A5,BMW X5
0,69000,6,0,1
1,35000,3,0,1
2,57000,5,0,1
3,22500,2,0,1
4,46000,4,0,1
5,59000,5,1,0
6,52000,5,1,0
7,72000,6,1,0
8,91000,8,1,0
9,67000,6,0,0


In [41]:
model2 = LinearRegression()
model2.fit(x_train2, y)

LinearRegression()

In [42]:
print(f'Intercept: {model2.intercept_}')
print(f'Coefficents: {model2.coef_}')

Intercept: 58976.625968537235
Coefficents: [-3.70122094e-01 -1.33245363e+03 -2.45354074e+03 -6.73820733e+03]


In [43]:
prediction11 = model2.predict([[45000, 4, 0, 0]])
print(prediction11)

[36991.31721061]


In [44]:
prediction21 = model2.predict([[86000, 6, 0, 1]])
print(prediction21)

[12413.1967598]


In [45]:
model2.score(x_train2, y)

0.9417050937281083

___
# Using LabelEncoder:

**Note: OrdinalEncoder and LabelEncoder has the same functionality but they are intended for something different:**
* LabelEncoder is used with target feature so it take input of shape (n,)
* OrdinlEncoder takes multiple features of size (n,d)

In [59]:
from sklearn.preprocessing import LabelEncoder

In [60]:
dfle = df.copy()
dfle

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [61]:
le = LabelEncoder()
dfle['Car Model'] = le.fit_transform(dfle['Car Model'])
dfle

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,1,69000,18000,6
1,1,35000,34000,3
2,1,57000,26100,5
3,1,22500,40000,2
4,1,46000,31500,4
5,0,59000,29400,5
6,0,52000,32000,5
7,0,72000,19300,6
8,0,91000,12000,8
9,2,67000,22000,6


In [62]:
x_train3 = dfle.drop(['Sell Price($)'], axis=1).values
print(x_train3)

[[    1 69000     6]
 [    1 35000     3]
 [    1 57000     5]
 [    1 22500     2]
 [    1 46000     4]
 [    0 59000     5]
 [    0 52000     5]
 [    0 72000     6]
 [    0 91000     8]
 [    2 67000     6]
 [    2 83000     7]
 [    2 79000     7]
 [    2 59000     5]]


In [63]:
y2 = dfle['Sell Price($)'].values
print(y2)

[18000 34000 26100 40000 31500 29400 32000 19300 12000 22000 20000 21000
 33000]


In [74]:
model3 = LinearRegression()
model3.fit(x_train3, y2)

LinearRegression()

In [78]:
print(f'Intercept: {model3.intercept_}')
print(f'Coefficents: {model3.coef_}')

Intercept: 48657.332794880545
Coefficents: [ 1.10167257e+03 -4.11783314e-01  2.51585847e+02]


In [76]:
prediction = model3.predict([[2, 45000, 4]])
print(prediction)

[33336.7721827]


In [77]:
prediction_2 = model3.predict([[1, 86000, 7]])
print(prediction_2)

[16106.74127344]


In [79]:
model3.score(x_train3, y2)

0.8719970367825952

___
# Using OrdinalEncoder:

In [82]:
from sklearn.preprocessing import OrdinalEncoder

In [83]:
oe = OrdinalEncoder()
dfoe = df.copy()
dfoe

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [87]:
dfoe[['Car Model']] = oe.fit_transform(dfoe[['Car Model']])
dfoe

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,1.0,69000,18000,6
1,1.0,35000,34000,3
2,1.0,57000,26100,5
3,1.0,22500,40000,2
4,1.0,46000,31500,4
5,0.0,59000,29400,5
6,0.0,52000,32000,5
7,0.0,72000,19300,6
8,0.0,91000,12000,8
9,2.0,67000,22000,6


In [88]:
x_train5 = dfoe.drop(['Sell Price($)'], axis=1)
x_train5

Unnamed: 0,Car Model,Mileage,Age(yrs)
0,1.0,69000,6
1,1.0,35000,3
2,1.0,57000,5
3,1.0,22500,2
4,1.0,46000,4
5,0.0,59000,5
6,0.0,52000,5
7,0.0,72000,6
8,0.0,91000,8
9,2.0,67000,6


In [89]:
model5 = LinearRegression()
model5.fit(x_train5, y2)

LinearRegression()

In [91]:
predict55 = model5.predict([[2.0, 45000, 4]])
print(predict55)

[33336.7721827]


In [92]:
predict56 = model5.predict([[1.0, 86000, 7]])
print(predict56)

[16106.74127344]


In [93]:
model5.score(x_train5, y2)

0.8719970367825952

___
# Using OneHotEncoder:

In [51]:
from sklearn.preprocessing import OneHotEncoder

In [52]:
ohe = OneHotEncoder()
dfohe = df.copy()
ttt = pd.DataFrame(ohe.fit_transform(dfohe[['Car Model']]).toarray(), columns=ohe.categories_)
ttt

Unnamed: 0,Audi A5,BMW X5,Mercedez Benz C class
0,0.0,1.0,0.0
1,0.0,1.0,0.0
2,0.0,1.0,0.0
3,0.0,1.0,0.0
4,0.0,1.0,0.0
5,1.0,0.0,0.0
6,1.0,0.0,0.0
7,1.0,0.0,0.0
8,1.0,0.0,0.0
9,0.0,0.0,1.0


In [65]:
me = pd.concat([dfohe, ttt], axis=1)
me

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs),"(Audi A5,)","(BMW X5,)","(Mercedez Benz C class,)"
0,BMW X5,69000,18000,6,0.0,1.0,0.0
1,BMW X5,35000,34000,3,0.0,1.0,0.0
2,BMW X5,57000,26100,5,0.0,1.0,0.0
3,BMW X5,22500,40000,2,0.0,1.0,0.0
4,BMW X5,46000,31500,4,0.0,1.0,0.0
5,Audi A5,59000,29400,5,1.0,0.0,0.0
6,Audi A5,52000,32000,5,1.0,0.0,0.0
7,Audi A5,72000,19300,6,1.0,0.0,0.0
8,Audi A5,91000,12000,8,1.0,0.0,0.0
9,Mercedez Benz C class,67000,22000,6,0.0,0.0,1.0


In [54]:
dfohe

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [55]:
# from sklearn.compose import ColumnTransformer
# ct = ColumnTransformer((OneHotEncoder(), ['Car Model']), remainder = 'passthrough')
# ct.fit_transform(dfohe)

In [58]:
# from sklearn.compose import make_column_transformer
# ct = make_column_transformer((OneHotEncoder(), ['Car Model']), remainder = 'passthrough')
# ct.fit_transform(dfohe)

In [66]:
x_train4 = me.drop(['Sell Price($)', 'Car Model'], axis=1)
x_train4

Unnamed: 0,Mileage,Age(yrs),"(Audi A5,)","(BMW X5,)","(Mercedez Benz C class,)"
0,69000,6,0.0,1.0,0.0
1,35000,3,0.0,1.0,0.0
2,57000,5,0.0,1.0,0.0
3,22500,2,0.0,1.0,0.0
4,46000,4,0.0,1.0,0.0
5,59000,5,1.0,0.0,0.0
6,52000,5,1.0,0.0,0.0
7,72000,6,1.0,0.0,0.0
8,91000,8,1.0,0.0,0.0
9,67000,6,0.0,0.0,1.0


In [69]:
model4 = LinearRegression()
model4.fit(x_train4, y2)

LinearRegression()

In [70]:
prediction3 = model4.predict([[45000, 4, 0, 0, 1]])
print(prediction3)

[36991.31721061]


In [72]:
prediction4 = model4.predict([[86000, 7, 0, 1, 0]])
print(prediction4)

[11080.74313219]


In [73]:
model4.score(x_train4, y2)

0.9417050937281082