In the pervious file, I have explored two different machine learning methods with Sklearn and compared the results. 
In the following sections, I aim to use deep learning to predict the car price and, in the end, compare the results with the machine learning methods.

In [1]:
#upload libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# let's upload the preprocessed data from the last project

In [2]:
data = pd.read_csv('data_prep.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Brand,Price,Body,Mileage,EngineV,Engine Type,Registration,Year
0,0,BMW,4200.0,sedan,277,2.0,Petrol,yes,1991
1,1,Mercedes-Benz,7900.0,van,427,2.9,Diesel,yes,1999
2,2,Mercedes-Benz,13300.0,sedan,358,5.0,Gas,yes,2003
3,3,Audi,23000.0,crossover,240,4.2,Petrol,yes,2007
4,4,Toyota,18300.0,crossover,120,2.0,Petrol,yes,2011


In [3]:
#let's drop unnamed column.
data = data.drop(['Unnamed: 0'], axis = 1)
data.head()

Unnamed: 0,Brand,Price,Body,Mileage,EngineV,Engine Type,Registration,Year
0,BMW,4200.0,sedan,277,2.0,Petrol,yes,1991
1,Mercedes-Benz,7900.0,van,427,2.9,Diesel,yes,1999
2,Mercedes-Benz,13300.0,sedan,358,5.0,Gas,yes,2003
3,Audi,23000.0,crossover,240,4.2,Petrol,yes,2007
4,Toyota,18300.0,crossover,120,2.0,Petrol,yes,2011


In [4]:
#let's drop year
data_pre_process = data.drop(['Year'], axis=1)

In [5]:
data_pre_process.head()

Unnamed: 0,Brand,Price,Body,Mileage,EngineV,Engine Type,Registration
0,BMW,4200.0,sedan,277,2.0,Petrol,yes
1,Mercedes-Benz,7900.0,van,427,2.9,Diesel,yes
2,Mercedes-Benz,13300.0,sedan,358,5.0,Gas,yes
3,Audi,23000.0,crossover,240,4.2,Petrol,yes
4,Toyota,18300.0,crossover,120,2.0,Petrol,yes


In [6]:
# now, in this case, let's use another method for dealing with dummies and scaling.
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split


# Create column transformer (this will help us normalize/preprocess our data)
ct = make_column_transformer(
    (MinMaxScaler(), ["Mileage", "EngineV"]), # scale Mileage and EngineV from 0 to 1
    (OneHotEncoder(handle_unknown="ignore"), ["Brand", "Body", "Engine Type", "Registration"])# get dummies 
)

# Create x & y for splitting the dataset to train and test
x = data_pre_process.drop("Price", axis=1)
y = data_pre_process["Price"]

# Build our train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Fit column transformer on the training data only (doing so on test data would result in data leakage)
ct.fit(x_train)

# Transform training and test data with normalization (MinMaxScalar) and one hot encoding (OneHotEncoder)
X_train_normal = ct.transform(x_train)
X_test_normal = ct.transform(x_test)

In [7]:
# let's conver X_train_normal and X_test_normal from csr_matrix to a numpy array because Keras can't work with csr_matrix.

X_train_normal = X_train_normal.toarray()


In [8]:
X_test_normal = X_test_normal.toarray()

In [10]:
# let's check the shape of training and testing data
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((3156, 6), (790, 6), (3156,), (790,))

### Deep Learning with TensorFlow

In [None]:
import tensorflow as tf

In [16]:
# creating a simple neural network.

# Set random seed
tf.random.set_seed(42)

# Create a  model_1
model_1 = tf.keras.Sequential([
  tf.keras.layers.Dense(128)
])

# Compile the model
model_1.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.SGD(),
                        metrics=['mae'])

# Fit the model
model_1.fit(X_train_normal, y_train, epochs=100, verbose=0)

<keras.callbacks.History at 0x7f0d9c5a2950>

In [17]:
model_1.evaluate(X_train_normal,y_train)



[17708.5078125, 17708.5078125]

In [18]:
model_1.evaluate(X_test_normal,y_test)



[18703.509765625, 18703.509765625]

## let's improving the model to reduce the loss function.

### step number one increasing the number of epochs from 100 to 1000

In [19]:
# creating a simple neural network.

# Set random seed
tf.random.set_seed(42)

# Create a  model_2
model_2 = tf.keras.Sequential([
  tf.keras.layers.Dense(128)
])

# Compile the model
model_2.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.SGD(),
                        metrics=['mae'])

# Fit the model
model_2.fit(X_train_normal, y_train, epochs=1000, verbose=0)

<keras.callbacks.History at 0x7f0d9c661bd0>

In [20]:
#let's evaluate the model
model_2.evaluate(X_train_normal,y_train)



[17689.25, 17689.25]

In [21]:
model_2.evaluate(X_test_normal,y_test)



[18684.330078125, 18684.330078125]

### Beautiful, the loss function dropped a little bit.

#### step number 2: let's add some hidden layers.

In [22]:
# creating a simple neural network.

# Set random seed
tf.random.set_seed(42)

# Create a  model_3
model_3 = tf.keras.Sequential([
  tf.keras.layers.Dense(128),
  tf.keras.layers.Dense(64),
  tf.keras.layers.Dense(32),
  
])

# Compile the model
model_3.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.SGD(),
                        metrics=['mae'])

# Fit the model
model_3.fit(X_train_normal, y_train, epochs=1000, verbose=0)

<keras.callbacks.History at 0x7f0d9c527520>

In [23]:
#let's evaluate the model
model_3.evaluate(X_train_normal,y_train)



[7039.5869140625, 7039.5869140625]

In [24]:
model_3.evaluate(X_test_normal,y_test)



[7740.22265625, 7740.22265625]

## WoW! by adding two hidden layers, we got a huge improvment.

#### Step3: lets use model_1 and just change the optimizer to Adam from SGD to see the impact of optimizer.

In [25]:
# Set random seed
tf.random.set_seed(42)

# Create a  model_4
model_4 = tf.keras.Sequential([
  tf.keras.layers.Dense(128),
  
])

# Compile the model
model_4.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.Adam(),
                        metrics=['mae'])

# Fit the model
model_4.fit(X_train_normal, y_train, epochs=100, verbose=0)

<keras.callbacks.History at 0x7f0d847cec20>

In [26]:
#let's evaluate the model
model_4.evaluate(X_train_normal,y_train)



[17657.435546875, 17657.435546875]

In [27]:
model_4.evaluate(X_test_normal,y_test)



[18652.373046875, 18652.373046875]

### interesting, Adam works very better than SGD. Let's combine Adam and multi hidden layers and 1000 epochs.

In [28]:
# Set random seed
tf.random.set_seed(42)

# Create a  model_5
model_5 = tf.keras.Sequential([
  tf.keras.layers.Dense(128),
  tf.keras.layers.Dense(64),
    tf.keras.layers.Dense(32)
])

# Compile the model
model_5.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.Adam(),
                        metrics=['mae'])

# Fit the model
model_5.fit(X_train_normal, y_train, epochs=1000, verbose=0)

<keras.callbacks.History at 0x7f0d84692410>

In [29]:
model_5.evaluate(X_train_normal,y_train)



[7032.60791015625, 7032.60791015625]

In [30]:
model_5.evaluate(X_test_normal,y_test)



[7733.408203125, 7733.408203125]

### Adam works again a little bit better. Let's change the learning rate of Adam.

In [31]:
# Set random seed
tf.random.set_seed(42)

# Create a  model_6
model_6 = tf.keras.Sequential([
  tf.keras.layers.Dense(128),
  tf.keras.layers.Dense(64),
    tf.keras.layers.Dense(32)
])

# Compile the model
model_6.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                        metrics=['mae'])

# Fit the model
model_6.fit(X_train_normal, y_train, epochs=1000, verbose=0)

<keras.callbacks.History at 0x7f0d846f5750>

In [32]:
model_6.evaluate(X_train_normal,y_train)



[7033.658203125, 7033.658203125]

In [33]:
model_6.evaluate(X_test_normal,y_test)



[7726.47021484375, 7726.47021484375]

### not significant improvment by changing LR. let's make a more comlicated version of the model with more hidden layers.

In [34]:
# Set random seed
tf.random.set_seed(42)

# Create a  model_7
model_7 = tf.keras.Sequential([
  tf.keras.layers.Dense(1028),
  tf.keras.layers.Dense(512), 
  tf.keras.layers.Dense(256),
  tf.keras.layers.Dense(128),
])

# Compile the model
model_7.compile(loss=tf.keras.losses.mae,
                        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                        metrics=['mae'])

# Fit the model
model_7.fit(X_train_normal, y_train, epochs=1000, verbose=0)

<keras.callbacks.History at 0x7f0d9c581510>

In [35]:
model_7.evaluate(X_train_normal,y_train)



[7032.716796875, 7032.716796875]

In [36]:
model_7.evaluate(X_test_normal,y_test)



[7733.3076171875, 7733.3076171875]