# Basic Supervised Linear Models

## Import Pacakges

In [13]:
import tensorflow as tf
import torch
from torch.utils import data
from torch import nn

import numpy as np
import sys

from d2l import tensorflow as d2l_tf
from d2l import torch as d2l_torch

In [24]:
print(tf.__version__)

2.8.0


* https://www.tensorflow.org/tutorials/keras/regression
* https://adamoudad.github.io/posts/keras_torch_comparison/syntax/

## Problem - Regression

### Global Variables

In [14]:
LEARNING_RATE = 0.03
NUM_EPOCHS = 5
BATCH_SIZE = 8
INIT_STDDEV = 0.01 # random initialisation from normal distribution

### Data Preparation

In [15]:
true_w = tf.constant([2.0, -3.0])
true_b = 4.0
features, labels = d2l_tf.synthetic_data(true_w, true_b, 1000)

In [16]:
features[:BATCH_SIZE], labels[:BATCH_SIZE]

(<tf.Tensor: shape=(8, 2), dtype=float32, numpy=
 array([[ 0.25883058,  2.0530121 ],
        [ 0.39729354, -1.0603833 ],
        [ 0.05721881, -0.73641664],
        [ 0.31468382, -1.043708  ],
        [-1.0798566 , -0.550977  ],
        [-1.6296045 ,  2.189807  ],
        [ 0.3019283 , -0.8288437 ],
        [-0.3030514 ,  0.40066257]], dtype=float32)>,
 <tf.Tensor: shape=(8, 1), dtype=float32, numpy=
 array([[-1.6477871],
        [ 7.9544015],
        [ 6.3242235],
        [ 7.7690415],
        [ 3.4966393],
        [-5.8354015],
        [ 7.093859 ],
        [ 2.1846747]], dtype=float32)>)

In [17]:
def load_array_tf(data_arrays, batch_size, is_train=True):  #@save
    """Construct a TensorFlow data iterator."""
    dataset = tf.data.Dataset.from_tensor_slices(data_arrays)
    if is_train:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size)
    return dataset

In [18]:
batch_size = BATCH_SIZE
data_iter = load_array_tf((features, labels), batch_size)

In [19]:
next(iter(data_iter))

(<tf.Tensor: shape=(8, 2), dtype=float32, numpy=
 array([[ 1.1484678 , -0.08717881],
        [-0.47707787,  1.4168806 ],
        [-0.58437693, -0.66772157],
        [-0.6238647 ,  0.85288274],
        [ 0.8883182 ,  0.2856394 ],
        [-1.4484255 ,  0.34251413],
        [ 0.35963306,  0.7526962 ],
        [ 0.6157805 , -0.04350418]], dtype=float32)>,
 <tf.Tensor: shape=(8, 1), dtype=float32, numpy=
 array([[ 6.548785  ],
        [-1.1996217 ],
        [ 4.844516  ],
        [ 0.19433065],
        [ 4.9166293 ],
        [ 0.07303171],
        [ 2.4524748 ],
        [ 5.358545  ]], dtype=float32)>)

---

In [8]:
true_w = torch.tensor([2.0, -3.0])
true_b = 4.0
features, labels = d2l_torch.synthetic_data(true_w, true_b, 1000)

In [9]:
features[:BATCH_SIZE], labels[:BATCH_SIZE]

(tensor([[ 0.9355, -0.8222],
         [-0.5997,  1.0499],
         [-0.9472,  0.2633],
         [ 0.9309,  0.4007],
         [-1.2002,  0.6576],
         [-0.3912,  0.0140],
         [ 0.0821, -0.7032],
         [ 0.5781,  0.5491]]),
 tensor([[ 8.3278],
         [-0.3404],
         [ 1.3197],
         [ 4.6700],
         [-0.3835],
         [ 3.1698],
         [ 6.2808],
         [ 3.5099]]))

In [10]:
def load_array_pt(data_arrays, batch_size, is_train=True):  #@save
    """Construct a PyTorch data iterator."""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

In [11]:
data_iter = load_array_pt((features, labels), BATCH_SIZE)

In [7]:
next(iter(data_iter))

[tensor([[ 2.0036,  0.4265],
         [-0.2566, -0.3797],
         [-0.3721, -0.6596],
         [ 0.6673,  0.2818],
         [-0.5474,  0.9549],
         [-0.1003, -0.0773],
         [ 0.5441,  0.1022],
         [ 1.7986, -0.4563]]),
 tensor([[6.7299],
         [4.6268],
         [5.2592],
         [4.4811],
         [0.0342],
         [4.0300],
         [4.7813],
         [8.9633]])]

### Model Training

#### TF Keras Sequential API 

Need to rerun tensorflow data prep part!

In [20]:
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))

In [21]:
initializer = tf.initializers.RandomNormal(stddev=INIT_STDDEV)
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.SGD(learning_rate=LEARNING_RATE)

**A. Model architecture**

In [30]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(2, ), name='input'),
    tf.keras.layers.Dense(1, activation='linear', 
                          kernel_initializer=initializer,
                          name='dense')
])

# # Alternatively
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.InputLayer(input_shape=(2, ))
# model.add(tf.keras.layers.Dense(1, kernel_initializer=initializer))

In [31]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1)                 3         
                                                                 
Total params: 3
Trainable params: 3
Non-trainable params: 0
_________________________________________________________________


* The input layer can be ommited here and model can be fitted on input data at a later stage. This way however, `model.summary()` can only be called later when the input data shape is known to the object.
* Any types of feature preprocessing e.g. normalisation (`tf.keras.layers.Normalization`) can also be included in the sequential steps as the initial layer.

**A. Training process**

In [36]:
def train(model, features, labels, batch_size, loss, num_epochs, optimizer):
    data_iter = load_array((features, labels), batch_size)
    for epoch in range(num_epochs):
        for X, y in data_iter:
            with tf.GradientTape() as tape:
                l = loss(model(X, training=True), y)
            grads = tape.gradient(l, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
        l = loss(model(features), labels)
        print(f'epoch {epoch + 1}, loss {l:f}')

In [37]:
train(model, features, labels, BATCH_SIZE, loss, NUM_EPOCHS, optimizer)

epoch 1, loss 0.000106
epoch 2, loss 0.000095
epoch 3, loss 0.000096
epoch 4, loss 0.000096
epoch 5, loss 0.000096


**B. Model architecture (Repeated)** 

In [33]:
model = tf.keras.Sequential([
  tf.keras.layers.Dense(1, activation='linear', 
                        kernel_initializer=initializer)
])

* Here we didn't define the input layer so naturally we cannot call `model.summary()` as the object doesn't know anything about the shape of the input data

**B. Compile model then train**

In [None]:
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=['mse', rmse])
model.fit(data_iter,
          epochs=NUM_EPOCHS)

**Results**

In [10]:
w = model.get_weights()[0]
print('error in estimating w', true_w - tf.reshape(w, true_w.shape))
b = model.get_weights()[1]
print('error in estimating b', true_b - b)

error in estimating w tf.Tensor([0.00014162 0.00013518], shape=(2,), dtype=float32)
error in estimating b [0.00060081]


In [11]:
print(w)
print(b)

[[ 1.9998584]
 [-3.4001353]]
[4.199399]


#### TF Keras Functional API

In [26]:
inputs = tf.keras.Input(shape=(2,))
# x = tf.keras.layers.Dense(10, activation="relu")(inputs)
outputs = tf.keras.layers.Dense(1)(inputs)

In [20]:
model = tf.keras.Model(inputs=inputs, outputs=outputs, name="linear_model")
model.summary()

Model: "linear_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2)]               0         
                                                                 
 dense_3 (Dense)             (None, 1)                 3         
                                                                 
Total params: 3
Trainable params: 3
Non-trainable params: 0
_________________________________________________________________


In [21]:
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=['mse', rmse])
model.fit(data_iter,
          epochs=NUM_EPOCHS)

Epoch 1/5

2022-03-31 18:42:05.860737: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x167f08520>

In [22]:
w = model.get_weights()[0]
print('error in estimating w', true_w - tf.reshape(w, true_w.shape))
b = model.get_weights()[1]
print('error in estimating b', true_b - b)

error in estimating w tf.Tensor([1.8048286e-04 4.8398972e-05], shape=(2,), dtype=float32)
error in estimating b [-0.00080442]


In [23]:
print(w)
print(b)

[[ 1.9998195]
 [-3.4000485]]
[4.200804]


#### Pytorch 

References
* https://pytorch.org/docs/stable/generated/torch.nn.Module.html
* https://medium.com/biaslyai/pytorch-linear-and-logistic-regression-models-5c5f0da2cb9
* https://medium.com/biaslyai/pytorch-introduction-to-neural-network-feedforward-neural-network-model-e7231cff47cb

Need to rerun Pytorch data prep part first!

In [8]:
model = nn.Sequential(nn.Linear(2, 1))

In [9]:
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=INIT_STDDEV)

model.apply(init_weights)
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [12]:
def train(model, train_iter, loss, num_epochs, optimizer:
    for epoch in range(num_epochs):
        for X, y in train_iter:
            l = loss(model(X) ,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
        l = loss(model(features), labels)
        print(f'epoch {epoch + 1}, loss {l:f}')

In [13]:
train(model, data_iter, loss, NUM_EPOCHS, optimizer)

epoch 1, loss 0.000114
epoch 2, loss 0.000104
epoch 3, loss 0.000104
epoch 4, loss 0.000104
epoch 5, loss 0.000104


In [15]:
w = model[0].weight.data
print('error in estimating w:', true_w - w.reshape(true_w.shape))
b = model[0].bias.data
print('error in estimating b:', true_b - b)

error in estimating w: tensor([0.0002, 0.0007])
error in estimating b: tensor([0.0005])


In [16]:
print(w)
print(b)

tensor([[ 1.9998, -3.0007]])
tensor([3.9995])
