In [359]:
#import libraries
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [360]:
# Read the data and print it
df = pd.read_csv("Car_prices.csv")
df.head()

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4


In [361]:
# Creating dummies and printing it
dummies = pd.get_dummies(df['Car Model'])
dummies

Unnamed: 0,Audi A5,BMW X5,Mercedez Benz C class
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,0,0,1


In [362]:
# Concatenate the dummies and df
df_dummies = pd.concat([df, dummies] , axis= 'columns')
df_dummies

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5,Mercedez Benz C class
0,BMW X5,69000,18000,6,0,1,0
1,BMW X5,35000,34000,3,0,1,0
2,BMW X5,57000,26100,5,0,1,0
3,BMW X5,22500,40000,2,0,1,0
4,BMW X5,46000,31500,4,0,1,0
5,Audi A5,59000,29400,5,1,0,0
6,Audi A5,52000,32000,5,1,0,0
7,Audi A5,72000,19300,6,1,0,0
8,Audi A5,91000,12000,8,1,0,0
9,Mercedez Benz C class,67000,22000,6,0,0,1


In [363]:
# Here we are going to drop Car Model and one dummy feature as we can predict with remaining feature
df_dummies.drop(['Car Model' , 'Mercedez Benz C class'], axis= 'columns', inplace=True)
df_dummies

Unnamed: 0,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5
0,69000,18000,6,0,1
1,35000,34000,3,0,1
2,57000,26100,5,0,1
3,22500,40000,2,0,1
4,46000,31500,4,0,1
5,59000,29400,5,1,0
6,52000,32000,5,1,0
7,72000,19300,6,1,0
8,91000,12000,8,1,0
9,67000,22000,6,0,0


In [364]:
# Training set
X = df_dummies.drop('Sell Price($)', axis = 'columns')
X

Unnamed: 0,Mileage,Age(yrs),Audi A5,BMW X5
0,69000,6,0,1
1,35000,3,0,1
2,57000,5,0,1
3,22500,2,0,1
4,46000,4,0,1
5,59000,5,1,0
6,52000,5,1,0
7,72000,6,1,0
8,91000,8,1,0
9,67000,6,0,0


In [365]:
# Target Variable
Y = df_dummies['Sell Price($)']
Y

0     18000
1     34000
2     26100
3     40000
4     31500
5     29400
6     32000
7     19300
8     12000
9     22000
10    20000
11    21000
12    33000
Name: Sell Price($), dtype: int64

In [366]:
# Building model using get_dummies
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X,Y)

LinearRegression()

In [367]:
# Predicting model
model.predict([['79000' , '7', 0 , 0]])

array([20409.80511857])

In [368]:
# Using label Encoder
from sklearn.preprocessing import LabelEncoder
Le = LabelEncoder()

In [369]:
# Creating df for LE
DataFrameLe = df

In [370]:
# Transform categorical variable to numeric using le
DataFrameLe['Car Model'] = Le.fit_transform(DataFrameLe['Car Model'])
DataFrameLe

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,1,69000,18000,6
1,1,35000,34000,3
2,1,57000,26100,5
3,1,22500,40000,2
4,1,46000,31500,4
5,0,59000,29400,5
6,0,52000,32000,5
7,0,72000,19300,6
8,0,91000,12000,8
9,2,67000,22000,6


In [371]:
# Splitting data into X training and Y training
X_Features = DataFrameLe[['Car Model' , 'Mileage' , 'Age(yrs)']].values # Here Values convert this into 2-D
X_Features

array([[    1, 69000,     6],
       [    1, 35000,     3],
       [    1, 57000,     5],
       [    1, 22500,     2],
       [    1, 46000,     4],
       [    0, 59000,     5],
       [    0, 52000,     5],
       [    0, 72000,     6],
       [    0, 91000,     8],
       [    2, 67000,     6],
       [    2, 83000,     7],
       [    2, 79000,     7],
       [    2, 59000,     5]], dtype=int64)

In [372]:
Y_Features = DataFrameLe['Sell Price($)'].values
Y_Features

array([18000, 34000, 26100, 40000, 31500, 29400, 32000, 19300, 12000,
       22000, 20000, 21000, 33000], dtype=int64)

In [373]:
from sklearn.preprocessing import OneHotEncoder

In [374]:
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([("Car Model", OneHotEncoder(), [0])], remainder = 'passthrough')
OneHotEn = ct.fit_transform(X_Features)

In [375]:
X_Le = OneHotEn[:,2:]

In [376]:
X_Le

array([[0.00e+00, 6.90e+04, 6.00e+00],
       [0.00e+00, 3.50e+04, 3.00e+00],
       [0.00e+00, 5.70e+04, 5.00e+00],
       [0.00e+00, 2.25e+04, 2.00e+00],
       [0.00e+00, 4.60e+04, 4.00e+00],
       [0.00e+00, 5.90e+04, 5.00e+00],
       [0.00e+00, 5.20e+04, 5.00e+00],
       [0.00e+00, 7.20e+04, 6.00e+00],
       [0.00e+00, 9.10e+04, 8.00e+00],
       [1.00e+00, 6.70e+04, 6.00e+00],
       [1.00e+00, 8.30e+04, 7.00e+00],
       [1.00e+00, 7.90e+04, 7.00e+00],
       [1.00e+00, 5.90e+04, 5.00e+00]])

In [377]:
# Fit the model
model.fit(X_Le , Y_Features)

LinearRegression()

In [378]:
# Predict
model.predict([[2,79000,7]])

array([24948.31302348])