<a href="https://colab.research.google.com/github/RoozbehSanaei/PyTorch-TensorFlow/blob/gh-pages/Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introductory Course to PyTorch and TensorFlow


*Generating Data*

In [1]:
import numpy as np
import math
def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

## Simple Linear Regression Model



*Numpy*

In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import math

# Create random input and output data

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

# Randomly initialize weights
a = np.random.uniform()
b = np.random.uniform()
c = np.random.uniform()
d = np.random.uniform()

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = np.mean(np.square(y_pred - y))
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()
    
    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

*TensorFlow*

In [8]:
import tensorflow as tf
class LinearRegressionKeras(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.w3 = tf.Variable(tf.random.uniform(shape=[1]))
        self.w2 = tf.Variable(tf.random.uniform(shape=[1]))
        self.w1 = tf.Variable(tf.random.uniform(shape=[1]))
        self.b = tf.Variable(tf.random.uniform(shape=[1]))

    def __call__(self,x): 
        return  x * x * x * self.w3 + x * x * self.w2 + x * self.w1 + self.b

In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import math
import tensorflow as tf

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

tf_model = LinearRegressionKeras()
[w3, w2, w1, b] = tf_model.trainable_variables

def squared_error(y_pred, y_true):
  return tf.reduce_mean(tf.square(y_pred - y_true))

learning_rate = 1e-6

for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    
    y_pred = tf_model(x)    

    grad_y_pred = 2.0 * (y_pred - y)
    grad_b = tf.reduce_sum(grad_y_pred)
    grad_w1 = tf.reduce_sum(grad_y_pred * x)
    grad_w2 = tf.reduce_sum(grad_y_pred * x ** 2)
    grad_w3 =  tf.reduce_sum(grad_y_pred * x ** 3)


    # Update weights
    b.assign(b-learning_rate * grad_b)
    w1.assign(w1-learning_rate * grad_w1)
    w2.assign(w2-learning_rate * grad_w2)
    w3.assign(w3-learning_rate * grad_w3)


In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import math
import tensorflow as tf

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

tf_model = LinearRegressionKeras()
[w3, w2, w1, b] = tf_model.trainable_variables

def squared_error(y_pred, y_true):
  return tf.reduce_mean(tf.square(y_pred - y_true))

learning_rate = 0.002

for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    
    with tf.GradientTape() as tape:
        y_pred = tf_model(x)    
        loss = squared_error(y_pred, y)
    # Compute and print loss
    print(loss)

    grad_w3,grad_w2,grad_w1,grad_b  = tape.gradient(loss, tf_model.trainable_variables)


    # Update weights
    b.assign(b-learning_rate * grad_b)
    w1.assign(w1-learning_rate * grad_w1)
    w2.assign(w2-learning_rate * grad_w2)
    w3.assign(w3-learning_rate * grad_w3)


In [33]:
# -*- coding: utf-8 -*-
import numpy as np
import math
import tensorflow as tf

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

learning_rate = 0.002

tf_model = LinearRegressionKeras()
[w3, w2, w1, b] = tf_model.trainable_variables
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)

def squared_error(y_pred, y_true):
  return tf.reduce_mean(tf.square(y_pred - y_true))


for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    
    with tf.GradientTape() as tape:
        y_pred = tf_model(x)    
        loss = squared_error(y_pred, y)
    # Compute and print loss
    print(loss)

    grads = tape.gradient(loss, tf_model.trainable_variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, tf_model.variables))

tf.Tensor(21.673033, shape=(), dtype=float32)
tf.Tensor(9.061057, shape=(), dtype=float32)
tf.Tensor(5.950099, shape=(), dtype=float32)
tf.Tensor(4.7749786, shape=(), dtype=float32)
tf.Tensor(4.060745, shape=(), dtype=float32)
tf.Tensor(3.5095546, shape=(), dtype=float32)
tf.Tensor(3.0526454, shape=(), dtype=float32)
tf.Tensor(2.6671674, shape=(), dtype=float32)
tf.Tensor(2.34059, shape=(), dtype=float32)
tf.Tensor(2.063611, shape=(), dtype=float32)
tf.Tensor(1.8286033, shape=(), dtype=float32)
tf.Tensor(1.6291511, shape=(), dtype=float32)
tf.Tensor(1.4598281, shape=(), dtype=float32)
tf.Tensor(1.3160378, shape=(), dtype=float32)
tf.Tensor(1.1938852, shape=(), dtype=float32)
tf.Tensor(1.0900695, shape=(), dtype=float32)
tf.Tensor(1.0017942, shape=(), dtype=float32)
tf.Tensor(0.92668897, shape=(), dtype=float32)
tf.Tensor(0.86274534, shape=(), dtype=float32)
tf.Tensor(0.80826145, shape=(), dtype=float32)
tf.Tensor(0.7617949, shape=(), dtype=float32)
tf.Tensor(0.7221238, shape=(), dtype=

In [86]:
class LinearRegressionKeras(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.linear = tf.keras.layers.Dense(1, activation=None) # , input_shape=[1]

    def call(self, x): 
        return self.linear(x)


# -*- coding: utf-8 -*-
import numpy as np
import math
import tensorflow as tf

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

learning_rate = 0.002

tf_model = LinearRegressionKeras()
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)

def squared_error(y_pred, y_true):
  return tf.reduce_mean(tf.square(y_pred - y_true))

tf_model_train_loop = LinearRegressionKeras()

optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)

for epoch in range(2000):
    x_batch = tf.reshape(x, [2000, 1])
    with tf.GradientTape() as tape:
        y_pred = tf_model_train_loop(x_batch)
        y_pred = tf.reshape(y_pred, [2000])
        loss = tf.losses.mse(y_pred, y)
    
    grads = tape.gradient(loss, tf_model_train_loop.variables)
    
    optimizer.apply_gradients(grads_and_vars=zip(grads, tf_model_train_loop.variables))

    if epoch % 20 == 0:
        print(f"Epoch {epoch} : Loss {loss.numpy()}")

Epoch 0 : Loss 12.20061206817627
Epoch 20 : Loss 7.18570613861084
Epoch 40 : Loss 4.270078182220459
Epoch 60 : Loss 2.5749545097351074
Epoch 80 : Loss 1.5894218683242798
Epoch 100 : Loss 1.0164408683776855
Epoch 120 : Loss 0.6833142638206482
Epoch 140 : Loss 0.48963701725006104
Epoch 160 : Loss 0.377034455537796
Epoch 180 : Loss 0.3115682005882263
Epoch 200 : Loss 0.27350664138793945
Epoch 220 : Loss 0.2513779103755951
Epoch 240 : Loss 0.23851245641708374
Epoch 260 : Loss 0.23103255033493042
Epoch 280 : Loss 0.22668379545211792
Epoch 300 : Loss 0.2241554707288742
Epoch 320 : Loss 0.2226855456829071
Epoch 340 : Loss 0.22183090448379517
Epoch 360 : Loss 0.22133402526378632
Epoch 380 : Loss 0.22104515135288239
Epoch 400 : Loss 0.22087720036506653
Epoch 420 : Loss 0.22077953815460205
Epoch 440 : Loss 0.22072277963161469
Epoch 460 : Loss 0.22068975865840912
Epoch 480 : Loss 0.22067059576511383
Epoch 500 : Loss 0.22065943479537964
Epoch 520 : Loss 0.22065293788909912
Epoch 540 : Loss 0.22064

*PyTorch*

In [44]:
import torch
class LinearRegressionPyTorch(torch.nn.Module): 
    def __init__(self): 
        super().__init__() 
        self.w3 = torch.nn.Parameter(torch.Tensor(1, 1).uniform_())
        self.w2 = torch.nn.Parameter(torch.Tensor(1, 1).uniform_())
        self.w1 = torch.nn.Parameter(torch.Tensor(1, 1).uniform_())
        self.b = torch.nn.Parameter(torch.Tensor(1).uniform_())
    def forward(self, x):  
        return  x**3@self.w3 + x**2@self.w2 + x @ self.w1 + self.b

In [64]:
# -*- coding: utf-8 -*-
import numpy as np
import math
import tensorflow as tf

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

x = torch.from_numpy(x.reshape(-1, 1))
y = torch.from_numpy(y.reshape(-1, 1))

torch_model = LinearRegressionPyTorch()
[w3, w2, w1, b] =  torch_model.parameters()

def squared_error(y_pred, y_true):
  return tf.reduce_mean(tf.square(y_pred - y_true))

learning_rate = 1e-6

for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = torch_model(x)    

    grad_y_pred = 2.0 * (y_pred - y)
    grad_b = torch.sum(grad_y_pred)
    grad_w1 = torch.sum(grad_y_pred * x)
    grad_w2 = torch.sum(grad_y_pred * x ** 2)
    grad_w3 =  torch.sum(grad_y_pred * x ** 3)


    with torch.no_grad():
    # Update weights
        b -= learning_rate * grad_b
        w1 -= learning_rate * grad_w1
        w2 -= learning_rate * grad_w2
        w3 -= learning_rate * grad_w3

    print(b)


Parameter containing:
tensor([0.1005], requires_grad=True)
Parameter containing:
tensor([0.0996], requires_grad=True)
Parameter containing:
tensor([0.0989], requires_grad=True)
Parameter containing:
tensor([0.0982], requires_grad=True)
Parameter containing:
tensor([0.0976], requires_grad=True)
Parameter containing:
tensor([0.0970], requires_grad=True)
Parameter containing:
tensor([0.0964], requires_grad=True)
Parameter containing:
tensor([0.0959], requires_grad=True)
Parameter containing:
tensor([0.0954], requires_grad=True)
Parameter containing:
tensor([0.0949], requires_grad=True)
Parameter containing:
tensor([0.0945], requires_grad=True)
Parameter containing:
tensor([0.0940], requires_grad=True)
Parameter containing:
tensor([0.0936], requires_grad=True)
Parameter containing:
tensor([0.0932], requires_grad=True)
Parameter containing:
tensor([0.0928], requires_grad=True)
Parameter containing:
tensor([0.0925], requires_grad=True)
Parameter containing:
tensor([0.0921], requires_grad=Tru

In [75]:

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

x = torch.from_numpy(x.reshape(-1, 1))
y = torch.from_numpy(y.reshape(-1, 1))


def squared_error(y_pred, y_true):
    return torch.mean(torch.square(y_pred - y_true))


torch_model = LinearRegressionPyTorch()
[w3, w2, w1, b] =  torch_model.parameters()

learning_rate = 0.002





for epoch in range(2000):
    y_pred = torch_model(x)
    loss = squared_error(y_pred, y)

    loss.backward()

    with torch.no_grad():
        w1 -= w1.grad * learning_rate
        w2 -= w2.grad * learning_rate
        w3 -= w3.grad * learning_rate
        b -= b.grad * learning_rate
        w1.grad.zero_()
        w2.grad.zero_()
        w3.grad.zero_()
        b.grad.zero_()

    print(f"Epoch {epoch} : Loss {loss.data}")

Epoch 0 : Loss 119.58746337890625
Epoch 1 : Loss 37.74802780151367
Epoch 2 : Loss 17.943822860717773
Epoch 3 : Loss 12.152372360229492
Epoch 4 : Loss 9.676018714904785
Epoch 5 : Loss 8.109704971313477
Epoch 6 : Loss 6.8940019607543945
Epoch 7 : Loss 5.882986068725586
Epoch 8 : Loss 5.025717258453369
Epoch 9 : Loss 4.295061111450195
Epoch 10 : Loss 3.6714775562286377
Epoch 11 : Loss 3.1390886306762695
Epoch 12 : Loss 2.684516668319702
Epoch 13 : Loss 2.2963781356811523
Epoch 14 : Loss 1.9649615287780762
Epoch 15 : Loss 1.6819778680801392
Epoch 16 : Loss 1.4403491020202637
Epoch 17 : Loss 1.2340307235717773
Epoch 18 : Loss 1.0578628778457642
Epoch 19 : Loss 0.9074399471282959
Epoch 20 : Loss 0.778998851776123
Epoch 21 : Loss 0.6693279147148132
Epoch 22 : Loss 0.5756833553314209
Epoch 23 : Loss 0.4957236349582672
Epoch 24 : Loss 0.42744845151901245
Epoch 25 : Loss 0.36915063858032227
Epoch 26 : Loss 0.3193718194961548
Epoch 27 : Loss 0.27686718106269836
Epoch 28 : Loss 0.2405737191438675


In [78]:

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

x = torch.from_numpy(x.reshape(-1, 1))
y = torch.from_numpy(y.reshape(-1, 1))


def squared_error(y_pred, y_true):
    return torch.mean(torch.square(y_pred - y_true))


torch_model = LinearRegressionPyTorch()

criterion = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.SGD(torch_model.parameters(), lr=learning_rate)

for epoch in range(2000):
    y_pred = torch_model(x)
    loss = squared_error(y_pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
      print(f"Epoch {epoch} : Loss {loss.data}")

Epoch 0 : Loss 15.971563339233398
Epoch 20 : Loss 0.7981100678443909
Epoch 40 : Loss 0.35374152660369873
Epoch 60 : Loss 0.3150010406970978
Epoch 80 : Loss 0.293123334646225
Epoch 100 : Loss 0.27340078353881836
Epoch 120 : Loss 0.25516727566719055
Epoch 140 : Loss 0.23829202353954315
Epoch 160 : Loss 0.22267252206802368
Epoch 180 : Loss 0.20821437239646912
Epoch 200 : Loss 0.19483056664466858
Epoch 220 : Loss 0.1824406385421753
Epoch 240 : Loss 0.17097017168998718
Epoch 260 : Loss 0.16035035252571106
Epoch 280 : Loss 0.15051761269569397
Epoch 300 : Loss 0.1414131373167038
Epoch 320 : Loss 0.13298246264457703
Epoch 340 : Loss 0.12517531216144562
Epoch 360 : Loss 0.11794523894786835
Epoch 380 : Loss 0.11124922335147858
Epoch 400 : Loss 0.10504744201898575
Epoch 420 : Loss 0.09930315613746643
Epoch 440 : Loss 0.09398232400417328
Epoch 460 : Loss 0.0890534520149231
Epoch 480 : Loss 0.08448739349842072
Epoch 500 : Loss 0.08025719970464706
Epoch 520 : Loss 0.0763380229473114
Epoch 540 : Loss

In [82]:
class LinearRegressionPyTorch(torch.nn.Module):
    def __init__(self):
        super(LinearRegressionPyTorch, self).__init__()
        self.linear = torch.nn.Linear(1, 1)  

    def forward(self, x):
        return self.linear(x)

def generate_data(n=2000):
    x = np.random.uniform(-math.pi,math.pi, n)
    noise = np.random.normal(0, 0.15, n)
    y = np.sin(x) + noise
    return x.astype(np.float32), y.astype(np.float32)

x, y = generate_data()

x = torch.from_numpy(x.reshape(-1, 1))
y = torch.from_numpy(y.reshape(-1, 1))


def squared_error(y_pred, y_true):
    return torch.mean(torch.square(y_pred - y_true))


torch_model = LinearRegressionPyTorch()

criterion = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.SGD(torch_model.parameters(), lr=learning_rate)

for epoch in range(20000):
    y_pred = torch_model(x)
    loss = squared_error(y_pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
      print(f"Epoch {epoch} : Loss {loss.data}")

Epoch 0 : Loss 4.552675724029541
Epoch 20 : Loss 2.8511154651641846
Epoch 40 : Loss 1.8346728086471558
Epoch 60 : Loss 1.224702000617981
Epoch 80 : Loss 0.856306254863739
Epoch 100 : Loss 0.6318390369415283
Epoch 120 : Loss 0.49342530965805054
Epoch 140 : Loss 0.406716912984848
Epoch 160 : Loss 0.351290225982666
Epoch 180 : Loss 0.31496885418891907
Epoch 200 : Loss 0.2904655933380127
Epoch 220 : Loss 0.27339619398117065
Epoch 240 : Loss 0.26110297441482544
Epoch 260 : Loss 0.2519589364528656
Epoch 280 : Loss 0.24495406448841095
Epoch 300 : Loss 0.23945049941539764
Epoch 320 : Loss 0.23503604531288147
Epoch 340 : Loss 0.23143728077411652
Epoch 360 : Loss 0.2284671515226364
Epoch 380 : Loss 0.22599340975284576
Epoch 400 : Loss 0.22391940653324127
Epoch 420 : Loss 0.22217227518558502
Epoch 440 : Loss 0.22069552540779114
Epoch 460 : Loss 0.21944433450698853
Epoch 480 : Loss 0.21838250756263733
Epoch 500 : Loss 0.21748031675815582
Epoch 520 : Loss 0.21671311557292938
Epoch 540 : Loss 0.2160