# tutorial of pytorch by myself

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns
import torch
import torch.nn as nn

  from .autonotebook import tqdm as notebook_tqdm


## Save all overwrited tensor in a list

The overwrited or deleted tensor are still be saved in the graph of pytorch, error back propogation can be performed still. But because we lost their name in the python namespace, it's hard to retrieve thoese names' grad. One solution is to save all tensor in a list.

In [3]:
# the original scenario
a=torch.tensor([2.0, 1.0], requires_grad=True)
b1=a**3
b2=b1**2
b1.retain_grad()
b2.retain_grad()
c=torch.sum(5*b2)
c.backward()
print('grad for a')
print(a.grad)
print('grad for b1')
print(b1.grad)
print('grad for b2')
print(b2.grad)

grad for a
tensor([960.,  30.])
grad for b1
tensor([80., 10.])
grad for b2
tensor([5., 5.])


In [4]:
# overwrite b1 iwth b2
a=torch.tensor([2.0, 1.0], requires_grad=True)
b=a**3
b=b**2
b.retain_grad()
c=torch.sum(5*b)
c.backward()
print('grad for a')
print(a.grad)
print('grad for b')
print(b.grad)
print('only the last b\'s grad is accessible')

grad for a
tensor([960.,  30.])
grad for b
tensor([5., 5.])
only the last b's grad is accessible


In [5]:
# save b and overwrited b in a list
a=torch.tensor([2.0, 1.0], requires_grad=True)
b_list=[]
b=a**3
b.retain_grad()
b_list.append(b)
b=b**2 # it seems the memory of the previous b tensor is saved in the graph, not deleted or overwrited
b.retain_grad()
b_list.append(b)
c=torch.sum(5*b)
c.backward()
print('grad for a')
print(a.grad)
print('grad for b1')
print(b_list[0].grad)
print('grad for b2')
print(b_list[1].grad)

grad for a
tensor([960.,  30.])
grad for b1
tensor([80., 10.])
grad for b2
tensor([5., 5.])


## Stacking a list to tensor won't block the error bp


In [7]:
b_list=[]
a1=torch.tensor([2.0, 1.0], requires_grad=True)
b=a1**3
b.retain_grad()
b_list.append(b)
a2=torch.tensor([1.0, 2.0], requires_grad=True)
b=a2**3
b.retain_grad()
b_list.append(b)

b_tensor=torch.stack(b_list)
b_tensor.retain_grad()

d=b_tensor**2
d.retain_grad()

c=torch.sum(5*d)
c.backward()

print('grad for a1')
print(a1.grad)

print('grad for a2')
print(a2.grad)

print('grad for b_tensor')
print(b_tensor.grad)

grad for a1
tensor([960.,  30.])
grad for a2
tensor([ 30., 960.])
grad for b_tensor
tensor([[80., 10.],
        [10., 80.]])


## inplace operation

An in-place operation is an operation that changes directly the content of a given Tensor without making a copy. Inplace operations in pytorch are always postfixed with a _, like .add_() or .scatter_(). Python operations like += or *= are also inplace operations.

In-place operations save some memory, but can be problematic when computing derivatives because of an immediate loss of history. Hence, their use is discouraged

Changing elements of a tensor is also a inplace operation. But it seems fine for autograd