In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

In [4]:
print(x_train)
print(x_train.shape)

tensor([[1.],
        [2.],
        [3.]])
torch.Size([3, 1])


In [5]:
W = torch.zeros(1, requires_grad=True)
print(W)

tensor([0.], requires_grad=True)


In [6]:
b = torch.zeros(1, requires_grad=True)
print(b)

tensor([0.], requires_grad=True)


In [7]:
hypothesis = x_train * W + b
print(hypothesis)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)


In [8]:
print(hypothesis)

tensor([[0.],
        [0.],
        [0.]], grad_fn=<AddBackward0>)


In [9]:
print(y_train)

tensor([[1.],
        [2.],
        [3.]])


In [10]:
print(hypothesis - y_train)

tensor([[-1.],
        [-2.],
        [-3.]], grad_fn=<SubBackward0>)


In [12]:
print((hypothesis - y_train) ** 2)

tensor([[1.],
        [4.],
        [9.]], grad_fn=<PowBackward0>)


In [13]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor(4.6667, grad_fn=<MeanBackward0>)


In [14]:
optimizer = optim.SGD([W, b], lr=0.01)

In [15]:
optimizer.zero_grad()
cost.backward()
optimizer.step()

In [16]:
print(W)
print(b)

tensor([0.0933], requires_grad=True)
tensor([0.0400], requires_grad=True)


In [17]:
hypothesis = x_train * W + b
print(hypothesis)

tensor([[0.1333],
        [0.2267],
        [0.3200]], grad_fn=<AddBackward0>)


In [18]:
cost = torch.mean((hypothesis - y_train) ** 2)
print(cost)

tensor(3.6927, grad_fn=<MeanBackward0>)


In [20]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_tain = torch.FloatTensor([[1], [2], [3]])
W = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
optimizer = optim.SGD([W, b], lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    hypothesis = x_train * W + b

    cost = torch.mean((hypothesis - y_train) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6f}'.format(
            epoch, nb_epochs, W.item(), b.item(), cost.item()
        ))

Epoch    0/1000 W: 0.093, b: 0.040 Cost: 4.666667
Epoch  100/1000 W: 0.873, b: 0.289 Cost: 0.012043
Epoch  200/1000 W: 0.900, b: 0.227 Cost: 0.007442
Epoch  300/1000 W: 0.921, b: 0.179 Cost: 0.004598
Epoch  400/1000 W: 0.938, b: 0.140 Cost: 0.002842
Epoch  500/1000 W: 0.951, b: 0.110 Cost: 0.001756
Epoch  600/1000 W: 0.962, b: 0.087 Cost: 0.001085
Epoch  700/1000 W: 0.970, b: 0.068 Cost: 0.000670
Epoch  800/1000 W: 0.976, b: 0.054 Cost: 0.000414
Epoch  900/1000 W: 0.981, b: 0.042 Cost: 0.000256
Epoch 1000/1000 W: 0.985, b: 0.033 Cost: 0.000158


In [21]:
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

Now, you just need to create a linear regression model. Basically, all models in PyTorch are created by inheriting the provided nn.Module.


In [26]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(1, 1)
    def forward(self, x):
        return self.linear(x)

The __init__ of the model defines the layers to be used. Since we are creating a linear regression model here, we will use nn.Linear . And forward tells how the model computes the output from the input.

In [27]:
model = LinearRegressionModel()

Now let's create a model and get the predicted value H(x)

In [28]:
hypothesis = model(x_train)

In [29]:
print(hypothesis)

tensor([[0.4784],
        [1.2280],
        [1.9776]], grad_fn=<AddmmBackward0>)


Now let's find the cost with the mean squared error (MSE). MSE is also provided by default in PyTorch.

In [30]:
print(hypothesis)
print(y_train)

tensor([[0.4784],
        [1.2280],
        [1.9776]], grad_fn=<AddmmBackward0>)
tensor([[1.],
        [2.],
        [3.]])


Try reducing the cost by changing W, b of H(x) using the last given cost. At this point you can use one of the optimizers from PyTorch's torch.optim .

In [31]:
cost = F.mse_loss(hypothesis, y_train)

In [32]:
print(cost)

tensor(0.6378, grad_fn=<MseLossBackward0>)


In [33]:
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [34]:
optimizer.zero_grad()
cost.backward()
optimizer.step()

Now that we understand the Linear Regression code, let's actually run the code and fit it.

In [35]:
# data
x_train = torch.FloatTensor([[1], [2], [3]])
y_tain = torch.FloatTensor([[1], [2], [3]])
# model initialization
model = LinearRegressionModel()
# optimizer settings
optimizer = optim.SGD(model.parameters(), lr=0.01)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    # hypothesis H(x) calculate
    prediction = model(x_train)
    # cost calculation
    cost = F.mse_loss(prediction, y_train)
    # improve H(x) by cost
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    # log output every 100 times
    if epoch % 100 == 0:
        params = list(model.parameters())
        W = params[0].item()
        b = params[1].item()
        print('Epoch {:4d}/{} W: {:.3f}, b: {:.3f} Cost: {:.6}'.format(
            epoch, nb_epochs, W, b, cost.item()
        ))


Epoch    0/1000 W: 0.859, b: 0.747 Cost: 0.273696
Epoch  100/1000 W: 0.766, b: 0.533 Cost: 0.0408869
Epoch  200/1000 W: 0.816, b: 0.419 Cost: 0.0252656
Epoch  300/1000 W: 0.855, b: 0.329 Cost: 0.0156126
Epoch  400/1000 W: 0.886, b: 0.259 Cost: 0.00964765
Epoch  500/1000 W: 0.911, b: 0.203 Cost: 0.00596166
Epoch  600/1000 W: 0.930, b: 0.160 Cost: 0.00368394
Epoch  700/1000 W: 0.945, b: 0.126 Cost: 0.00227645
Epoch  800/1000 W: 0.957, b: 0.099 Cost: 0.00140671
Epoch  900/1000 W: 0.966, b: 0.078 Cost: 0.000869259
Epoch 1000/1000 W: 0.973, b: 0.061 Cost: 0.000537152
