# Deep RL hands-on by Maxim Lapan
## Chapter 3 Deep Learning with PyTorch
* conda activate gym 
  - which will work with torch 1.1, tensorflow 2.0 with CUDA 10
* this book use torch and tensorboard of tf 2.0

In [1]:
import torch as th
import numpy as np

In [2]:
a = th.FloatTensor(3,2)
a

tensor([[2.0283e-19, 6.9772e+22],
        [1.8943e+23, 1.1432e-32],
        [1.3563e-19, 1.8888e+31]])

In [3]:
a.zero_() # make all contents of a to zero

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [4]:
th.FloatTensor([[1,2,3], [3,2,1]]) # make custom tensor with contents

tensor([[1., 2., 3.],
        [3., 2., 1.]])

In [5]:
n = np.zeros(shape=(3,2))
n

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [6]:
b = th.tensor(n)
b # copy np.array as torch tensor

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]], dtype=torch.float64)

In [7]:
n = np.zeros(shape=(3,2), dtype=np.float16) # you can determine the data type for memory efficiency
c = th.tensor(n, dtype=th.int8) # you can change the data type when copy the data into torch tensor
c

tensor([[0, 0],
        [0, 0],
        [0, 0]], dtype=torch.int8)

In [8]:
a = th.tensor([1,2,3])
s = a.sum()
print(s)
print(s.item()) # you get the content of scalar tensor by using .item()
print(th.tensor(1)) # scalar tensor don't have []

tensor(6)
6
tensor(1)


In [9]:
a = th.FloatTensor([2,3])
a

tensor([2., 3.])

In [10]:
ca = a.cuda()
ca

tensor([2., 3.], device='cuda:0')

In [11]:
a + 2

tensor([4., 5.])

In [12]:
ca + 1

tensor([3., 4.], device='cuda:0')

In [13]:
ca.device

device(type='cuda', index=0)

In [14]:
c1a = ca.to('cuda:1')
c1a

tensor([2., 3.], device='cuda:1')

In [15]:
ca

tensor([2., 3.], device='cuda:0')

In [16]:
c1a +3

tensor([5., 6.], device='cuda:1')

## gradient

In [17]:
v1 = th.tensor([1.0, 1.0], requires_grad = True)
v2 = th.tensor([2.0, 2.0])
v_sum = v1 + v2
v_res = (v_sum*2).sum()
v_res

tensor(12., grad_fn=<SumBackward0>)

In [18]:
v1.is_leaf, v2.is_leaf

(True, True)

In [19]:
v_sum.is_leaf, v_res.is_leaf

(False, False)

In [20]:
v1.requires_grad, v2.requires_grad

(True, False)

In [21]:
v_sum.requires_grad, v_res.requires_grad

(True, True)

In [22]:
v_res.backward()

In [23]:
v1.grad # dv_res/dv1 = 2

tensor([2., 2.])

In [24]:
v2.grad

## making neural layer

In [35]:
import torch.nn as nn

In [36]:
l  = nn.Linear(2,5)
v = th.FloatTensor([1,2])
l(v) 

tensor([-0.2467, -0.7217, -0.0682, -0.4887,  0.0658], grad_fn=<AddBackward0>)

In [37]:
print(l.weight)
np_weight= l.weight.detach().numpy()

Parameter containing:
tensor([[ 0.4690, -0.3233],
        [-0.0531,  0.0175],
        [ 0.5759, -0.5187],
        [ 0.0730, -0.4583],
        [ 0.6206, -0.3256]], requires_grad=True)


In [38]:
np_weight

array([[ 0.46903664, -0.32328588],
       [-0.05310351,  0.01753706],
       [ 0.5759322 , -0.51870704],
       [ 0.07304054, -0.45830405],
       [ 0.62061924, -0.32557967]], dtype=float32)

In [39]:
print(l.bias)
np_bias = l.bias.detach().numpy()
np_bias

Parameter containing:
tensor([-0.0691, -0.7036,  0.3933,  0.3549,  0.0963], requires_grad=True)


array([-0.06913096, -0.70363176,  0.3932529 ,  0.35486287,  0.09634274],
      dtype=float32)

In [40]:
np.matmul(np_weight, v.numpy()) + np_bias

array([-0.24666607, -0.72166115, -0.06822896, -0.48870468,  0.06580263],
      dtype=float32)

In [44]:
print(l.parameters)

<bound method Module.parameters of Linear(in_features=2, out_features=5, bias=True)>


In [50]:
l.to('cuda:0') # move all parameter to gpu:0

Linear(in_features=2, out_features=5, bias=True)

In [56]:
state_dict = l.state_dict() # return dictionary witl all parameter
print(state_dict['weight'])
print(state_dict['bias'])

tensor([[ 0.4690, -0.3233],
        [-0.0531,  0.0175],
        [ 0.5759, -0.5187],
        [ 0.0730, -0.4583],
        [ 0.6206, -0.3256]], device='cuda:0')
tensor([-0.0691, -0.7036,  0.3933,  0.3549,  0.0963], device='cuda:0')


In [57]:
s = nn.Sequential(nn.Linear(2, 5),
                  nn.ReLU(),
                  nn.Linear(5, 20),
                  nn.ReLU(),
                  nn.Linear(20, 10),
                  nn.Dropout(p = 0.3),
                  nn.Softmax(dim = 1))
s

Sequential(
  (0): Linear(in_features=2, out_features=5, bias=True)
  (1): ReLU()
  (2): Linear(in_features=5, out_features=20, bias=True)
  (3): ReLU()
  (4): Linear(in_features=20, out_features=10, bias=True)
  (5): Dropout(p=0.3)
  (6): Softmax()
)

In [58]:
s(th.FloatTensor([[1,2]]))

tensor([[0.0678, 0.0936, 0.1483, 0.0965, 0.0936, 0.1165, 0.0936, 0.0936, 0.0936,
         0.1031]], grad_fn=<SoftmaxBackward>)

In [59]:
class OurModule(nn.Module):
    def __init__(self, num_inputs, num_classes, dropout_prob=0.3):
        super(OurModule, self).__init__()
        self.pipe = nn.Sequential(
            nn.Linear(num_inputs, 5),
            nn.ReLU(),
            nn.Linear(5, 20),
            nn.ReLU(),
            nn.Linear(20, num_classes),
            nn.Dropout(p=dropout_prob),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        return self.pipe(x)


In [63]:
net = OurModule(num_inputs=2, num_classes=3)
print(net)
v = th.FloatTensor([[2, 3]])
out = net(v)
print(out)
print("Cuda's availability is %s" % th.cuda.is_available())
if th.cuda.is_available():
    print("Data from cuda: %s" % out.to('cuda'))

OurModule(
  (pipe): Sequential(
    (0): Linear(in_features=2, out_features=5, bias=True)
    (1): ReLU()
    (2): Linear(in_features=5, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
    (5): Dropout(p=0.3)
    (6): Softmax()
  )
)
tensor([[0.3711, 0.2856, 0.3433]], grad_fn=<SoftmaxBackward>)
Cuda's availability is True
Data from cuda: tensor([[0.3711, 0.2856, 0.3433]], device='cuda:0', grad_fn=<CopyBackwards>)


### Using Tensorboard

In [94]:
import math
import tensorflow as tf
import datetime

In [101]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") # it's convient to record the date with current time
LOGDIR = './tmp/py_tensorboard/' + current_time + '/tri' 

In [102]:
writer = tf.summary.create_file_writer(LOGDIR) # tf 2.0 has changed the modules of tensorboard

In [103]:
funcs = {"sin": math.sin, "cos": math.cos, "tan": math.tan}

In [104]:
for angle in range(-360, 360):
    angle_rad = angle * math.pi / 180
    for name, fun in funcs.items():
        val = fun(angle_rad)
        with writer.as_default():
            tf.summary.scalar(name, val, angle)

### GAN on Atari images, skipped