## Gradient

In [1]:
import torch

In [3]:
x = torch.randn(3,4,requires_grad=True)
x

tensor([[ 1.2066, -0.8176, -0.5722, -0.2192],
        [-1.3776, -1.5579,  1.0111,  0.6343],
        [-0.0969,  1.2330,  1.7345, -0.3557]], requires_grad=True)

In [4]:
b = torch.randn(3,4,requires_grad=True)
b

tensor([[ 1.5999,  0.7062,  0.8313, -1.3118],
        [-0.3060,  2.3464,  0.2191, -0.6782],
        [-0.0836, -1.3554,  1.2559,  0.4027]], requires_grad=True)

In [5]:
t = x+b

In [6]:
y = t.sum()
y

tensor(4.4492, grad_fn=<SumBackward0>)

In [7]:
y.backward()

In [9]:
b.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [10]:
t.requires_grad

True

### Cal

In [11]:
x = torch.rand(1)
b = torch.rand(1,requires_grad = True)
w = torch.rand(1,requires_grad = True)
y = w * x
z = y + b

In [12]:
x.is_leaf, w.is_leaf, b.is_leaf, y.is_leaf, z.is_leaf 

(True, True, True, False, False)

In [13]:
z.backward(retain_graph=True)

In [14]:
w.grad

tensor([0.4197])

In [15]:
b.grad

tensor([1.])

### Regression

#### x, y training data

In [18]:
import numpy as np

x_values = [i for i in range(11)]
x_train = np.array(x_values,dtype=np.float32)
x_train = x_train.reshape(-1,1)
x_train.shape

(11, 1)

In [20]:
y_values = [4*i+5 for i in x_values]
y_train = np.array(y_values,dtype=np.float32)
y_train = y_train.reshape(-1,1)
y_train.shape

(11, 1)

#### model

In [21]:
import torch.nn as nn

In [29]:
class LinearRgressionModel(nn.Module):
    def __init__(self,input_dim,output_dim):
        super(LinearRgressionModel,self).__init__()
        self.linear = nn.Linear(input_dim,output_dim)
    def forward(self,x):
        out = self.linear(x)
        return out

In [30]:
input_dim=1
output_dim=1
model = LinearRgressionModel(input_dim,output_dim)

In [31]:
model

LinearRgressionModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)

#### parameters and loss function

In [35]:
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
criterion = nn.MSELoss()

#### train

In [39]:
for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train)
    labels = torch.from_numpy(y_train)
    
    # clean gradients
    optimizer.zero_grad()
    
    # forward
    outputs = model(inputs)
    
    # loss
    loss = criterion(outputs,labels)
    
    # back
    loss.backward()
    
    
    # update
    optimizer.step()
    
    if epoch % 50 == 0:
        print('epoch {},loss {}'.format(epoch,loss.item()))

epoch 50,loss 3.8444079109467566e-05
epoch 100,loss 2.193249383708462e-05
epoch 150,loss 1.251150661119027e-05
epoch 200,loss 7.138766250136541e-06
epoch 250,loss 4.071950570505578e-06
epoch 300,loss 2.3234279069583863e-06
epoch 350,loss 1.3248012464828207e-06
epoch 400,loss 7.564365773760073e-07
epoch 450,loss 4.325591476117552e-07
epoch 500,loss 2.464127248913428e-07
epoch 550,loss 1.40716693408649e-07
epoch 600,loss 8.038711740709914e-08
epoch 650,loss 4.567294453750037e-08
epoch 700,loss 2.588225633815e-08
epoch 750,loss 1.4807648440751109e-08
epoch 800,loss 8.676000362584091e-09
epoch 850,loss 4.6947290499588235e-09
epoch 900,loss 3.0761797553680026e-09
epoch 950,loss 1.8266167467473338e-09
epoch 1000,loss 8.660249739556036e-10


In [42]:
predicted = model(torch.from_numpy(x_train).requires_grad_()).data.numpy()
predicted

array([[ 4.9999447],
       [ 8.999952 ],
       [12.999961 ],
       [16.99997  ],
       [20.999977 ],
       [24.999985 ],
       [28.999994 ],
       [33.       ],
       [37.000008 ],
       [41.000015 ],
       [45.000023 ]], dtype=float32)

### Save model

In [43]:
torch.save(model.state_dict(),'model.pkl')
model.load_state_dict(torch.load('model.pkl'))

<All keys matched successfully>

### GPU

In [45]:
class LinearRgressionModel(nn.Module):
    def __init__(self,input_dim,output_dim):
        super(LinearRgressionModel,self).__init__()
        self.linear = nn.Linear(input_dim,output_dim)
    def forward(self,x):
        out = self.linear(x)
        return out
    
input_dim=1
output_dim=1
model = LinearRgressionModel(input_dim,output_dim)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)
criterion = nn.MSELoss()

for epoch in range(epochs):
    epoch += 1
    inputs = torch.from_numpy(x_train).to(device)
    labels = torch.from_numpy(y_train).to(device)
    
    # clean gradients
    optimizer.zero_grad()
    
    # forward
    outputs = model(inputs)
    
    # loss
    loss = criterion(outputs,labels)
    
    # back
    loss.backward()
    
    
    # update
    optimizer.step()
    
    if epoch % 50 == 0:
        print('epoch {},loss {}'.format(epoch,loss.item()))


epoch 50,loss 3.946622848510742
epoch 100,loss 2.2510063648223877
epoch 150,loss 1.2838897705078125
epoch 200,loss 0.7322835922241211
epoch 250,loss 0.4176669120788574
epoch 300,loss 0.23822173476219177
epoch 350,loss 0.1358734667301178
epoch 400,loss 0.0774967223405838
epoch 450,loss 0.04420126602053642
epoch 500,loss 0.025211090222001076
epoch 550,loss 0.014379395171999931
epoch 600,loss 0.008201493881642818
epoch 650,loss 0.004677999764680862
epoch 700,loss 0.0026682179886847734
epoch 750,loss 0.0015218466287478805
epoch 800,loss 0.0008680180762894452
epoch 850,loss 0.0004950548755005002
epoch 900,loss 0.000282366294413805
epoch 950,loss 0.00016105506801977754
epoch 1000,loss 9.18474979698658e-05
