## Backpropagation

### Using autograd libraries to compute gradients

- Implement Gradient Descent using auto-grad
- Estimate Linear and Logistic Regression using auto-grad

In [1]:
import torch

Minmize $X^2+4X$

In [2]:
x=torch.tensor(0.0,requires_grad=True)

In [3]:
z=x*x+4*x ### forward pass

In [4]:
z.backward()

In [5]:
x.grad ### Grad of x at 0 wrt z

tensor(4.)

In [6]:
z=x*x+4*x

In [7]:
z.backward()

In [8]:
x.grad ### 

tensor(8.)

In [9]:
x.grad.zero_()

tensor(0.)

In [10]:
x.grad

tensor(0.)

In [11]:
x=torch.tensor(0.0,requires_grad=True)
lr=0.01
for i in range(10):
    z=x*x+4*x
    z.backward() ### dz/dx
    with torch.no_grad(): ##Disables any gradient computation
        x-=lr*x.grad
        x.grad.zero_()
    print(f"Z {z}, x: {x}")

Z 0.0, x: -0.03999999910593033
Z -0.15839999914169312, x: -0.07919999957084656
Z -0.31052735447883606, x: -0.11761599779129028
Z -0.4566304683685303, x: -0.15526367723941803
Z -0.5969479084014893, x: -0.19215840101242065
Z -0.7317087650299072, x: -0.22831523418426514
Z -0.8611330986022949, x: -0.2637489140033722
Z -0.9854321479797363, x: -0.29847392439842224
Z -1.104809045791626, x: -0.3325044512748718
Z -1.2194585800170898, x: -0.3658543527126312


In [12]:
### Autodiff
### xy=750=>x=750/y
## x+10y Minimize this
### 750/y+y*10

In [13]:
x=torch.tensor(1.0,requires_grad=True)
lr=0.01
for i in range(100):
    z=(750/x)+x*10
    z.backward()
    with torch.no_grad():
        x-=lr*x.grad
        x.grad.zero_()
    print(f"Z: {z}, x: {x}")

Z: 760.0, x: 8.399999618530273
Z: 173.2857208251953, x: 8.406291961669922
Z: 173.28179931640625, x: 8.41242504119873
Z: 173.27809143066406, x: 8.418403625488281
Z: 173.27456665039062, x: 8.42423152923584
Z: 173.27120971679688, x: 8.429913520812988
Z: 173.2680206298828, x: 8.435453414916992
Z: 173.26498413085938, x: 8.4408540725708
Z: 173.26211547851562, x: 8.446120262145996
Z: 173.25936889648438, x: 8.451254844665527
Z: 173.25677490234375, x: 8.45626163482666
Z: 173.25428771972656, x: 8.46114444732666
Z: 173.25193786621094, x: 8.465906143188477
Z: 173.24969482421875, x: 8.470550537109375
Z: 173.24755859375, x: 8.475079536437988
Z: 173.24554443359375, x: 8.479496955871582
Z: 173.2436065673828, x: 8.483805656433105
Z: 173.2417755126953, x: 8.488008499145508
Z: 173.24002075195312, x: 8.492108345031738
Z: 173.23837280273438, x: 8.496108055114746
Z: 173.23678588867188, x: 8.500009536743164
Z: 173.23529052734375, x: 8.503815650939941
Z: 173.23385620117188, x: 8.507528305053711
Z: 173.2324981

In [15]:
import pandas as pd
reg=pd.read_csv("../data/regression.csv").dropna()

In [16]:
reg.head(2)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin
0,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0
1,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0


In [17]:
### mpg=b0+b1*cyl ## as a matrix product?
### dloss/db0, dloss/db1
#### loss=f(b0,b1)
##loss=eq
##loss.backward()
##b0.grad
##b1.grad
X=reg[['cylinders']].values
y=reg[['mpg']].values

In [18]:
## W dim?=> dloss/dW,dloss/db
W=torch.randn(1,1,requires_grad=True)
b=torch.randn(1,requires_grad=True)

In [19]:
X=torch.tensor(X)
y=torch.tensor(y)

In [20]:
lr=0.01
for i in range(100):
    diff=y-torch.matmul(X.float(),W)+b
    loss=sum(diff*diff)/y.shape[0]
    loss.backward()
    with torch.no_grad():
        W-=lr*W.grad
        b-=lr*b.grad
        W.grad.zero_()
        b.grad.zero_()
    print(f"Loss: {loss.item()}, W: {W.detach().numpy()}, b: {b.detach().numpy()}")

Loss: 656.742904087977, W: [[2.4741]], b: [0.52897274]
Loss: 238.55183099785884, W: [[3.2661462]], b: [0.3202374]
Loss: 193.9184689687916, W: [[3.5150633]], b: [0.20235732]
Loss: 188.70296726041, W: [[3.5875692]], b: [0.11407603]
Loss: 187.64840846052513, W: [[3.6027856]], b: [0.03549533]
Loss: 187.03462827013078, W: [[3.5994065]], b: [-0.03984849]
Loss: 186.46908242105295, W: [[3.5900018]], b: [-0.11405525]
Loss: 185.9103467995149, W: [[3.5786538]], b: [-0.18780711]
Loss: 185.3540457445425, W: [[3.566689]], b: [-0.26132584]
Loss: 184.7997097647657, W: [[3.5545375]], b: [-0.3346836]
Loss: 184.2472826740042, W: [[3.5423403]], b: [-0.40790406]
Loss: 183.69675593192903, W: [[3.5301418]], b: [-0.48099497]
Loss: 183.14811985990613, W: [[3.5179574]], b: [-0.553959]
Loss: 182.6013690909681, W: [[3.5057921]], b: [-0.62679726]
Loss: 182.05649682534835, W: [[3.4936466]], b: [-0.6995101]
Loss: 181.51349766162096, W: [[3.4815216]], b: [-0.7720978]
Loss: 180.9723636998647, W: [[3.4694176]], b: [-0.

In [21]:
### linear classifier.
### Can you estimate a linear classifier using autodiff
### Loss for linear classifier?
### log loss as a function of W and b, p=f(X,W,b)

In [22]:
cls=pd.read_csv("../data/classification.csv")
cls.head()

Unnamed: 0,No_pregnant,Plasma_glucose,Blood_pres,Skin_thick,Serum_insu,BMI,Diabetes_func,Age,Class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [23]:
X=cls[['No_pregnant']].values
y=cls[['Class']].values

In [24]:
### loss=−[𝑦𝑙𝑜𝑔(𝑝+tol)+(1−𝑦)𝑙𝑜𝑔(1−𝑝+tol)]
### p=1/(1+e^-z)
### z= XW+b

In [25]:
X=torch.tensor(X)
y=torch.tensor(y)
W=torch.randn(1,1,requires_grad=True)
b=torch.randn(1,requires_grad=True)
tol=0.0000000001
lr=0.01
for i in range(100):
    z=torch.matmul(X.float(),W)+b
    p=1.0/(1+torch.exp(-z))
    loss=-(y*torch.log(p+tol)+(1-y)*torch.log(1-p+tol)).mean()
    loss.backward()
    with torch.no_grad():
        W-=lr*W.grad
        b-=lr*b.grad
        W.grad.zero_()
        b.grad.zero_()
    print(f"Loss: {loss.item()}, W: {W}, b: {b}")

Loss: 0.8471252918243408, W: tensor([[0.2586]], requires_grad=True), b: tensor([-0.3354], requires_grad=True)
Loss: 0.8292115330696106, W: tensor([[0.2459]], requires_grad=True), b: tensor([-0.3382], requires_grad=True)
Loss: 0.8124645352363586, W: tensor([[0.2336]], requires_grad=True), b: tensor([-0.3409], requires_grad=True)
Loss: 0.7968730926513672, W: tensor([[0.2217]], requires_grad=True), b: tensor([-0.3436], requires_grad=True)
Loss: 0.7824189066886902, W: tensor([[0.2104]], requires_grad=True), b: tensor([-0.3462], requires_grad=True)
Loss: 0.769075334072113, W: tensor([[0.1994]], requires_grad=True), b: tensor([-0.3487], requires_grad=True)
Loss: 0.7568092346191406, W: tensor([[0.1890]], requires_grad=True), b: tensor([-0.3511], requires_grad=True)
Loss: 0.7455804944038391, W: tensor([[0.1790]], requires_grad=True), b: tensor([-0.3534], requires_grad=True)
Loss: 0.7353433966636658, W: tensor([[0.1695]], requires_grad=True), b: tensor([-0.3557], requires_grad=True)
Loss: 0.726

## Using tensorflow to compute gradients 

$y = x^2 +4x$

$\frac{dy}{dx} = 2x+4$

In [26]:
import tensorflow as tf
x = tf.Variable(3.0)

Metal device set to: Apple M1


2022-07-03 17:37:56.230587: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-03 17:37:56.230773: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [27]:
with tf.GradientTape() as tape:
    y = x**2+4*x

In [28]:
dy_dx = tape.gradient(y, x) ## compute dy/dx


In [29]:
dy_dx.numpy()

10.0

In [30]:
dy_dx

<tf.Tensor: shape=(), dtype=float32, numpy=10.0>

Write the gradient descent using ```GradientTape()```

In [31]:
x = tf.Variable(0.0)
lr = 0.1
for i in range(10):
    with tf.GradientTape() as tape:
        y = x**2+4*x
    grad = tape.gradient(y,x)
    x.assign_sub(lr*grad)
    print(x.numpy())

-0.4
-0.72
-0.9760001
-1.1808001
-1.34464
-1.4757121
-1.5805696
-1.6644558
-1.7315646
-1.7852517


### Linear Regression with basic tf

In [32]:
reg.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin
0,18.0,8.0,307.0,130.0,3504.0,12.0,70.0,1.0
1,15.0,8.0,350.0,165.0,3693.0,11.5,70.0,1.0
2,18.0,8.0,318.0,150.0,3436.0,11.0,70.0,1.0
3,16.0,8.0,304.0,150.0,3433.0,12.0,70.0,1.0
4,17.0,8.0,302.0,140.0,3449.0,10.5,70.0,1.0


In [33]:
X=reg[['cylinders']].values
y=reg[['mpg']].values

In [34]:
X = tf.constant(X,dtype='float32')
y = tf.constant(y,dtype='float32')

In [35]:
def loss(y_pred,y):
    return tf.reduce_mean(tf.square(y-y_pred))

In [36]:
W = tf.Variable(tf.random.normal(shape=(1,1),dtype='float32'))
B = tf.Variable(tf.random.normal(shape=(1,),dtype='float32'))
lr = 0.01
for i in range(10):
    with tf.GradientTape() as tape:
        y_pred = tf.reshape(X@W+B,shape=X.shape[0])
        error = loss(y_pred,y)
    dw,db = tape.gradient(error,[W,B])
    W.assign_sub(lr*dw)
    B.assign_sub(lr*db)
    print(error)

tf.Tensor(192.6729, shape=(), dtype=float32)
tf.Tensor(115.464874, shape=(), dtype=float32)
tf.Tensor(107.17809, shape=(), dtype=float32)
tf.Tensor(106.16406, shape=(), dtype=float32)
tf.Tensor(105.9179, shape=(), dtype=float32)
tf.Tensor(105.75324, shape=(), dtype=float32)
tf.Tensor(105.59764, shape=(), dtype=float32)
tf.Tensor(105.4435, shape=(), dtype=float32)
tf.Tensor(105.289955, shape=(), dtype=float32)
tf.Tensor(105.13697, shape=(), dtype=float32)


In [41]:
## Class Excercise: Use tensorflow api to write the gradients for logistic regresssion