<a href="https://colab.research.google.com/github/Hainguyendangduc/DeepLearning_PJ/blob/main/MPG_predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np

In [None]:
# Download data
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data

--2022-04-14 08:38:24--  https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 30286 (30K) [application/x-httpd-php]
Saving to: ‘auto-mpg.data.1’


2022-04-14 08:38:24 (1.02 MB/s) - ‘auto-mpg.data.1’ saved [30286/30286]



In [None]:
data = pd.read_csv('auto-mpg.data', sep = '\s+', 
                   names=['MPG', 'Cylinders', 'Displacement', 'Horse power',
                          'Weight', 'Acceleration', 'Model Year', 'Origin', 'Car Name'])

print(data.shape)
data.head()

(398, 9)


Unnamed: 0,MPG,Cylinders,Displacement,Horse power,Weight,Acceleration,Model Year,Origin,Car Name
0,18.0,8,307.0,130.0,3504.0,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693.0,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436.0,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433.0,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449.0,10.5,70,1,ford torino


In [None]:
# Find missing data
data.isna().sum()

MPG             0
Cylinders       0
Displacement    0
Horse power     0
Weight          0
Acceleration    0
Model Year      0
Origin          0
Car Name        0
dtype: int64

In [None]:
# Train test spliting
train_dataset = data.sample(frac=0.8,random_state=0)
test_dataset = data.drop(train_dataset.index)

In [None]:
train_stats = train_dataset.describe()
train_stats = train_stats.transpose()
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MPG,318.0,23.590566,7.913617,10.0,17.125,22.75,29.0,46.6
Cylinders,318.0,5.427673,1.682941,3.0,4.0,4.0,6.0,8.0
Displacement,318.0,193.061321,103.812742,70.0,100.25,151.0,259.5,455.0
Weight,318.0,2963.823899,844.749805,1613.0,2219.25,2792.5,3571.25,5140.0
Acceleration,318.0,15.595912,2.796282,8.0,13.9,15.5,17.3,24.8
Model Year,318.0,75.946541,3.705266,70.0,73.0,76.0,79.0,82.0
Origin,318.0,1.553459,0.79943,1.0,1.0,1.0,2.0,3.0


In [None]:
test_stats = test_dataset.describe()
test_stats = test_stats.transpose()
test_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MPG,80.0,23.2125,7.455528,9.0,17.5,23.45,29.0,43.1
Cylinders,80.0,5.5625,1.777808,3.0,4.0,4.0,8.0,8.0
Displacement,80.0,194.875,106.718413,68.0,107.0,143.0,302.5,400.0
Weight,80.0,2996.6625,859.968618,1755.0,2240.75,2885.5,3740.0,4955.0
Acceleration,80.0,15.4575,2.612568,8.5,13.65,15.2,16.925,22.2
Model Year,80.0,76.2625,3.679377,70.0,73.0,76.0,80.0,82.0
Origin,80.0,1.65,0.812871,1.0,1.0,1.0,2.0,3.0


In [None]:
train_labels = train_dataset.pop('MPG').values.reshape(-1, 1)
test_labels = test_dataset.pop('MPG').values.reshape(-1, 1)
print(train_labels.shape, test_labels.shape)

(318, 1) (80, 1)


In [None]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

In [None]:
print(normed_train_data.shape)


(318, 9)


In [None]:
g = lambda z: 1 / (1 + np.exp(-z))

class SigmoidLayer:
  def __init__(self, n_inp, n_out, bias = True):
    if bias: n_inp += 1
    w = np.random.randn(n_inp, n_out)
    self.w = w
    self.bias = bias
    self.is_forward = False
  
  def forward(self, X):
    if self.bias:
      X = np.hstack((np.ones((X.shape[0], 1)), X))
    
    z = np.dot(X, self.w)
    a = g(z)

    self.is_forward = True
    self.x = X
    self.a = a
    return a

  def back_propagation(self, delta):
    a = self.a

    dw = np.dot(self.x.T, delta * a * (1.0 - a))

    if self.bias:
      da = np.dot(delta * a * (1.0 - a), self.w.T)[:, 1:] 
    else:
      da = np.dot(delta * a * (1.0 - a), self.w.T)
    
    return dw, da

In [None]:
class LinearLayer:
  def __init__(self, n_inp, n_out, bias=True):
    if bias: n_inp += 1
    w = np.random.randn(n_inp, n_out)
    self.w = w
    self.bias = bias
    self.is_forward = False
  
  def forward(self, X):
    if self.bias:
      X = np.hstack((np.ones((X.shape[0], 1)), X))
    
    z = np.dot(X, self.w)
  
    self.x = X
    self.a = z
    return z
  
  def back_propagation(self, delta):
    a = self.a
    dw = np.dot(self.x.T, delta * a * (1.0 - a))
    
    if self.bias:
      da = np.dot(delta * a * (1.0 - a), self.w.T)[:, 1:]
    else:
      da = np.dot(delta * a * (1.0 - a), self.w.T)

    return dw/self.a.shape[0], da

In [None]:
X_train = normed_train_data.loc[:, ["Cylinders", "Displacement", "Weight"]].values
X_train.shape

(318, 3)

In [None]:
X_test = normed_test_data.loc[:, ["Cylinders", "Displacement", "Weight"]].values
X_test.shape

(80, 3)

In [None]:
# X_test

In [None]:
# X_train

In [None]:
def loss(y, y_pred):
  return np.mean((y_pred - y)**2)

In [None]:
#model
NDIMS = 3
L1 = 32
L2 = 16
EPOCHS = 500
LR = 0.02

layers = [SigmoidLayer(NDIMS, L1, bias = False),
          SigmoidLayer(L1, L2),
          LinearLayer(L2,1)]

def predict(x, model):
  for l in model:
    x = l.forward(x)
  return x
#   y_pred = predict(X_train, model = layers)
#   l = loss(train_labels,y_pred)

# y_pred = predict(X_train, model = layers)

In [None]:
def train_step(x, y, model, lr = 0.02):
  y_pred = predict(x,model)
  delta = y_pred - y

  for l in reversed(model):
    dw, delta = l.back_propagation(delta)
    l.w -= lr * dw
  


In [None]:
for i in range(5):
  train_step(X_train, train_labels, model =layers, lr=0.000001)
  y_pred = predict(X_train, model = layers)
  l = loss(train_labels,y_pred)
  print(loss(train_labels, y_pred))

808.8886988950275
862.7405224146208
953.7028215381614
1052.861971949211
1114.6498348025987
