## Gradient

In [1]:
import torch

In [2]:
x = torch.randn(3,4,requires_grad=True)
x

tensor([[-0.7447,  0.7443, -0.3827,  1.8956],
        [-0.6521, -1.6380, -0.2594,  0.3422],
        [ 0.4002,  0.5529, -0.3605,  1.6759]], requires_grad=True)

In [3]:
b = torch.randn(3,4,requires_grad=True)
b

tensor([[ 0.9897,  1.2783,  1.1202,  0.8872],
        [-0.8363,  2.3575, -3.9925, -1.2781],
        [-1.4624, -0.4510, -0.8345,  0.7075]], requires_grad=True)

In [4]:
t = x+b

In [5]:
y = t.sum()
y

tensor(0.0591, grad_fn=<SumBackward0>)

In [6]:
y.backward()

In [7]:
b.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [8]:
t.requires_grad

True

### Cal

In [9]:
x = torch.rand(1)
b = torch.rand(1,requires_grad = True)
w = torch.rand(1,requires_grad = True)
y = w * x
z = y + b

In [10]:
x.is_leaf, w.is_leaf, b.is_leaf, y.is_leaf, z.is_leaf 

(True, True, True, False, False)

In [11]:
z.backward(retain_graph=True)

In [12]:
w.grad

tensor([0.1849])

In [13]:
b.grad

tensor([1.])

### Regression

#### x, y training data

In [14]:
import numpy as np

x_values = [i for i in range(11)]
x_train = np.array(x_values,dtype=np.float32)
x_train = x_train.reshape(-1,1)
x_train.shape

(11, 1)

In [15]:
y_values = [4*i+5 for i in x_values]
y_train = np.array(y_values,dtype=np.float32)
y_train = y_train.reshape(-1,1)
y_train.shape

(11, 1)

#### model

In [16]:
import torch.nn as nn

In [17]:
class LinearRgressionModel(nn.Module):
    def __init__(self,input_dim,output_dim):
        super(LinearRgressionModel,self).__init__()
        self.linear = nn.Linear(input_dim,output_dim)
    def forward(self,x):
        out = self.linear(x)
        return out

In [18]:
input_dim=1
output_dim=1
model = LinearRgressionModel(input_dim,output_dim)

In [19]:
model

LinearRgressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

#### parameters and loss function

In [20]:
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
criterion = nn.MSELoss()

#### train

In [21]:
for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)
    
    # clean gradients
    optimizer.zero_grad()
    
    # forward
    outputs = model(inputs)
    
    # loss
    loss = criterion(outputs,labels)
    
    # back
    loss.backward()
    
    
    # update
    optimizer.step()
    
    if epoch % 50 == 0:
        print('epoch {},loss {}'.format(epoch,loss.item()))

epoch 50,loss 3.5899481773376465
epoch 100,loss 2.0475733280181885
epoch 150,loss 1.167859673500061
epoch 200,loss 0.6661033630371094
epoch 250,loss 0.37992003560066223
epoch 300,loss 0.2166920155286789
epoch 350,loss 0.12359335273504257
epoch 400,loss 0.07049278914928436
epoch 450,loss 0.04020634666085243
epoch 500,loss 0.022931911051273346
epoch 550,loss 0.01307953055948019
epoch 600,loss 0.007460181601345539
epoch 650,loss 0.0042549786157906055
epoch 700,loss 0.002426860388368368
epoch 750,loss 0.001384245348162949
epoch 800,loss 0.0007895108428783715
epoch 850,loss 0.00045033020433038473
epoch 900,loss 0.00025687110610306263
epoch 950,loss 0.0001465037785237655
epoch 1000,loss 8.355110185220838e-05


In [22]:
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted

array([[ 4.982996],
       [ 8.985445],
       [12.987893],
       [16.990341],
       [20.99279 ],
       [24.99524 ],
       [28.997686],
       [33.000137],
       [37.002586],
       [41.005035],
       [45.007484]], dtype=float32)

### Save model

In [23]:
torch.save(model.state_dict(),'model.pkl')
model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

### GPU

In [24]:
class LinearRgressionModel(nn.Module):
    def __init__(self,input_dim,output_dim):
        super(LinearRgressionModel,self).__init__()
        self.linear = nn.Linear(input_dim,output_dim)
    def forward(self,x):
        out = self.linear(x)
        return out
    
input_dim=1
output_dim=1
model = LinearRgressionModel(input_dim,output_dim)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
criterion = nn.MSELoss()

for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)
    
    # clean gradients
    optimizer.zero_grad()
    
    # forward
    outputs = model(inputs)
    
    # loss
    loss = criterion(outputs,labels)
    
    # back
    loss.backward()
    
    
    # update
    optimizer.step()
    
    if epoch % 50 == 0:
        print('epoch {},loss {}'.format(epoch,loss.item()))


epoch 50,loss 4.3861870765686035
epoch 100,loss 2.5017170906066895
epoch 150,loss 1.4268853664398193
epoch 200,loss 0.8138422966003418
epoch 250,loss 0.46418485045433044
epoch 300,loss 0.26475435495376587
epoch 350,loss 0.1510057896375656
epoch 400,loss 0.08612798899412155
epoch 450,loss 0.0491241030395031
epoch 500,loss 0.028018712997436523
epoch 550,loss 0.01598086953163147
epoch 600,loss 0.00911486055701971
epoch 650,loss 0.005198811646550894
epoch 700,loss 0.0029651224613189697
epoch 750,loss 0.001691219164058566
epoch 800,loss 0.0009645908139646053
epoch 850,loss 0.0005501704290509224
epoch 900,loss 0.000313791970256716
epoch 950,loss 0.00017895617929752916
epoch 1000,loss 0.00010205437138210982
