# Linear Model (Basic)

https://docs.python.org/3/library/pdb.html

In [1]:
import torch
import numpy as np
import pdb  # python debuger

In [2]:
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]

In [3]:
# y = x * w + b, ignore b to simplify mode
# our model for the forward pass
def forward(x):
    return x * w


# Loss function
def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) ** 2

In [4]:
# compute gradient
def gradient(x, y):  # d_loss/d_w
    return 2 * x * (x * w - y)

## loss graph

In [None]:
# List of weights/Mean square Error (Mse) for each input
w_list = []
mse_list = []

for w in np.arange(0.0, 4.1, 0.1):  # w is random guess to predit y_pred
    # Print the weights and initialize the lost
    print("w =", w)
    l_sum = 0

    for x_val, y_val in zip(x_data, y_data):
        # For each input and output, calculate y_hat
        # Compute the total loss and add to the total error
        y_pred_val = forward(x_val)
        l = loss(x_val, y_val)
        l_sum += l
        print("\t", x_val, y_val, y_pred_val, l)
    # Now compute the Mean squared error (mse) of each
    # Aggregate the weight/mse from this run
    print("MSE =", l_sum / 3)
    w_list.append(w)
    mse_list.append(l_sum / 3)

In [None]:
import matplotlib.pyplot as plt

# Plot loss graph
plt.plot(w_list, mse_list)
plt.ylabel('Loss')
plt.xlabel('w')
plt.show()

## auto gradient

In [6]:
# Before training
w = torch.tensor([1.0], requires_grad=True)  # this time we use tensor
print("Prediction (before training)",  4, forward(4).item())

# Training loop
for epoch in range(10):
    for x_val, y_val in zip(x_data, y_data):
        y_pred = forward(x_val)  # 1) Forward pass
        l = loss(y_pred, y_val)  # 2) Compute loss
        l.backward()  # 3) Back propagation to update weights
        print("\tgrad: ", x_val, y_val, w.grad.item())
        w.data = w.data - 0.01 * w.grad.item()  # 0.01 is learning rate, [[update w !!!]]

        # Manually zero the gradients after updating weights
        w.grad.data.zero_()

    print(f"Epoch: {epoch} | Loss: {l.item()}")

# After training
print("Prediction (after training)",  4, forward(4).item())

Prediction (before training) 4 4.0
	grad:  1.0 2.0 -4.0
	grad:  2.0 4.0 -15.28217601776123
	grad:  3.0 6.0 -24.784862518310547
Epoch: 0 | Loss: 2.9981918334960938
	grad:  1.0 2.0 0.4352611303329468
	grad:  2.0 4.0 1.4480102062225342
	grad:  3.0 6.0 1.1067644357681274
Epoch: 1 | Loss: 0.004207730293273926
	grad:  1.0 2.0 -0.054894957691431046
	grad:  2.0 4.0 -0.18468163907527924
	grad:  3.0 6.0 -0.15071865916252136
Epoch: 2 | Loss: 7.899241609266028e-05
	grad:  1.0 2.0 0.007354147266596556
	grad:  2.0 4.0 0.024704117327928543
	grad:  3.0 6.0 0.019997775554656982
Epoch: 3 | Loss: 1.3883075098419795e-06
	grad:  1.0 2.0 -0.00097911327611655
	grad:  2.0 4.0 -0.0032907063141465187
	grad:  3.0 6.0 -0.002662297338247299
Epoch: 4 | Loss: 2.461115400365088e-08
	grad:  1.0 2.0 0.00012947585491929203
	grad:  2.0 4.0 0.000431585795013234
	grad:  3.0 6.0 0.00035605719313025475
Epoch: 5 | Loss: 4.4019543565809727e-10
	grad:  1.0 2.0 -1.753307515173219e-05
	grad:  2.0 4.0 -6.204011879162863e-05
	grad:

# Linear Model (Torch)

https://pytorch.org/docs/stable/optim.html#torch.optim.Optimizer.zero_grad

https://pytorch.org/tutorials/beginner/nn_tutorial.html?highlight=zero_grad

https://pytorch.org/docs/stable/optim.html#torch.optim.Optimizer.step

https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch


We need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. This is convenient while training RNNs


In [7]:
from torch import nn
import torch
from torch import tensor

In [8]:
x_data = tensor([[1.0], [2.0], [3.0]])
y_data = tensor([[2.0], [4.0], [6.0]])

In [9]:
class Model(nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.linear = torch.nn.Linear(1, 1)  # One in and one out

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        y_pred = self.linear(x)
        return y_pred

In [14]:
# our model
model = Model()

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)  # learn rate

# Training loop (forward + backward)
for epoch in range(500):
    # 1) Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x_data)
    # print('Data: {}, Predict: {}'.format(y_data, y_pred))
    
    # 2) Compute and print loss
    loss = criterion(y_pred, y_data)
    if epoch % 50 == 0:
        print(f'Epoch: {epoch} | Loss: {loss.item()} ')

    # Zero gradients, perform a [[backward]] pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch: 0 | Loss: 58.63104248046875 
Epoch: 50 | Loss: 0.40764355659484863 
Epoch: 100 | Loss: 0.1976705640554428 
Epoch: 150 | Loss: 0.09585221856832504 
Epoch: 200 | Loss: 0.046479612588882446 
Epoch: 250 | Loss: 0.022538447752594948 
Epoch: 300 | Loss: 0.010929154232144356 
Epoch: 350 | Loss: 0.0052996305748820305 
Epoch: 400 | Loss: 0.002569872885942459 
Epoch: 450 | Loss: 0.0012461405713111162 


In [12]:
# After training
hour_var = tensor([[4.0]])  # create var
y_pred = model(hour_var)  # forward
print("Prediction (after training)",  4, y_pred.data[0][0].item())

Prediction (after training) 4 7.982583999633789


# Logistic Regression

## simple input

In [1]:
from torch import tensor
from torch import nn
from torch import sigmoid
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# Training data and ground truth
x_data = tensor([[1.0], [2.0], [3.0], [4.0]])
y_data = tensor([[0.], [0.], [1.], [1.]])

In [3]:
class Model(nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate nn.Linear module
        """
        super(Model, self).__init__()
        self.linear = nn.Linear(1, 1)  # One in and one out

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data.
        """
        y_pred = sigmoid(self.linear(x))
        return y_pred

In [4]:
# our model
model = Model()

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = nn.BCELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(1000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x_data)

    # Compute and print loss
    loss = criterion(y_pred, y_data)
    if epoch % 100 == 0:
        print(f'Epoch {epoch + 1}/1000 | Loss: {loss.item():.4f}')

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch 1/1000 | Loss: 1.3503
Epoch 101/1000 | Loss: 0.7784
Epoch 201/1000 | Loss: 0.7337
Epoch 301/1000 | Loss: 0.7017
Epoch 401/1000 | Loss: 0.6720
Epoch 501/1000 | Loss: 0.6443
Epoch 601/1000 | Loss: 0.6184
Epoch 701/1000 | Loss: 0.5943
Epoch 801/1000 | Loss: 0.5718
Epoch 901/1000 | Loss: 0.5508


In [5]:
# After training
print(f'\nLet\'s predict the hours need to score above 50%\n{"=" * 50}')
hour_var = model(tensor([[1.0]]))
print(
    f'Prediction after 1 hour of training: {hour_var.item():.4f} | Above 50%: {hour_var.item() > 0.5}')
hour_var = model(tensor([[7.0]]))
print(
    f'Prediction after 7 hours of training: {hour_var.item():.4f} | Above 50%: { hour_var.item() > 0.5}')


Let's predict the hours need to score above 50%
Prediction after 1 hour of training: 0.4673 | Above 50%: False
Prediction after 7 hours of training: 0.9340 | Above 50%: True


## multiple inputs

https://pytorch.org/docs/stable/nn.html?highlight=bceloss#torch.nn.BCELoss

https://en.wikipedia.org/wiki/Cross_entropy

In [1]:
from torch import nn, optim, from_numpy
import numpy as np

In [2]:
xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32)
x_data = from_numpy(xy[:, 0:-1])
y_data = from_numpy(xy[:, [-1]])
print(f'X\'s shape: {x_data.shape} | Y\'s shape: {y_data.shape}')

X's shape: torch.Size([759, 8]) | Y's shape: torch.Size([759, 1])


In [3]:
class Model(nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.l1 = nn.Linear(8, 6)
        self.l2 = nn.Linear(6, 4)
        self.l3 = nn.Linear(4, 1)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return y_pred

In [5]:
# our model
model = Model()


# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = nn.BCELoss(reduction='mean')  # binary cross entropy
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [6]:
# Training loop
for epoch in range(100):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x_data)

    # Compute and print loss
    loss = criterion(y_pred, y_data)
    if epoch % 10 == 0:
        print(f'Epoch: {epoch + 1}/100 | Loss: {loss.item():.4f}')

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


Epoch: 1/100 | Loss: 0.6840
Epoch: 11/100 | Loss: 0.6576
Epoch: 21/100 | Loss: 0.6493
Epoch: 31/100 | Loss: 0.6466
Epoch: 41/100 | Loss: 0.6456
Epoch: 51/100 | Loss: 0.6453
Epoch: 61/100 | Loss: 0.6452
Epoch: 71/100 | Loss: 0.6452
Epoch: 81/100 | Loss: 0.6451
Epoch: 91/100 | Loss: 0.6451


In [14]:
# Since I don't have additional data to text...
var = model(x_data)
print((y_data - var).mean())

tensor(0.0010, grad_fn=<MeanBackward0>)
