## Learning pytorch

In [1]:
import torch

In [2]:
import torchvision

In [3]:
torch.cuda.is_available()

False

In [5]:
## Torch Tensors

a = torch.tensor([2,2,1])
print(a)

tensor([2, 2, 1])


In [10]:
## 2-D Tensor

b = torch.tensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print(b)

tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])


In [11]:
## size of tensor
print(a.shape)
print(b.shape)
print(a.size())
print(b.size())

torch.Size([3])
torch.Size([4, 3])
torch.Size([3])
torch.Size([4, 3])


In [12]:
## Get the height/number of rows of b

print(b.shape[0])

4


In [15]:
## Define Float Tensor
c = torch.FloatTensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print(c)

## or we can do 
c = torch.tensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]],dtype=torch.float)

tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]])


In [16]:
print(c)

tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]])


In [17]:
## Define Double tensor
d = torch.DoubleTensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print(d)

tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]], dtype=torch.float64)


In [18]:
d = torch.tensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]],dtype=torch.double)
print(d)

tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]], dtype=torch.float64)


In [19]:
print(c.mean())

tensor(6.5000)


In [20]:
print(d.mean())

tensor(6.5000, dtype=torch.float64)


In [21]:
print(c.std())

tensor(3.6056)


In [22]:
print(d.std())

tensor(3.6056, dtype=torch.float64)


In [32]:
## Reshape b
print(b)
##Note: if one of the dimension is -1 , its size can be inferred
## with -1 , torch automatically identifies the shape of the dataset
print(b.view(-1,1))
print(b.view(12))
print("Exactly Same")
print(b.view(-1,4))
print(b.view(3,4))
## assign b a new shape

b = b.view(1,-1)
print(b)
print(b.shape)
## we can even reshape 3D tensors
print('\n Reshape 3D Tensor')
## Create 3D tensor with 2 channels, 3 rows and 4 columns (channels,rows,columns)
three_dim = torch.randn(2,3,4)
print("\n")
print(three_dim)
print(three_dim.view(2,12)) ## reshape to 2 rows and 12 columns
print(three_dim.view(2,-1))


tensor([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]])
tensor([[ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 9],
        [10],
        [11],
        [12]])
tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])
Exactly Same
tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])
tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])
tensor([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]])
torch.Size([1, 12])

 Reshape 3D Tensor


tensor([[[-0.2488,  0.3214,  1.0353,  2.0964],
         [-0.1158, -0.3109,  0.9340, -1.2685],
         [ 0.9289, -0.5995, -0.0384,  0.4313]],

        [[ 2.1955, -1.1190, -2.1035, -0.7137],
         [-0.3835, -0.7317, -0.1828,  0.0279],
         [-0.0803, -0.8355, -0.5537,  0.3270]]])
tensor([[-0.2488,  0.3214,  1.0353,  2.0964, -0.1158, -0.3109,  0.9340, -1.2685,
          0.9289, -0.5995, -0.0384,  0.4313],
        [ 2.1955, -1.1190, -2.

In [31]:
## create matrix with random numbers between 0 and 1
r = torch.rand(4,4)
print(r)

tensor([[0.0985, 0.6153, 0.8002, 0.8301],
        [0.7281, 0.4216, 0.7838, 0.4438],
        [0.7288, 0.4235, 0.6825, 0.2432],
        [0.8579, 0.7832, 0.6422, 0.3423]])


In [33]:
### create a matrix with random numbers taken from normal distribution with mean zero and variance =1
r2 = torch.randn(4,4)
print(r2)
print(r2.dtype)

tensor([[ 1.1514, -0.5789, -0.2085,  0.1305],
        [-0.5260,  0.2188, -0.4218,  1.7036],
        [-1.6577,  0.1456, -0.8763, -0.7859],
        [-1.8275, -0.3673,  0.8895,  1.3448]])
torch.float32


In [34]:
## create an array of 5 random integers from values between 6 and 9 (exclusive of 10)
in_array = torch.randint(6,10,(5,))
print(in_array)
print(in_array.dtype)

tensor([7, 8, 7, 9, 6])
torch.int64


In [35]:
## create an array of 5 random integers from values between 6 and 9 (exclusive of 10)
in_array2 = torch.randint(6,10,(3,3))
print(in_array2)
# print(in_array.dtype)

tensor([[8, 9, 8],
        [9, 6, 8],
        [7, 9, 6]])


In [37]:
## get the number of elements in in_array
print(torch.numel(in_array))

## get the number of elements in in_array2
print(torch.numel(in_array2))

5
9


In [45]:
## Construct a 3X3 matrix of zeroes and dtype long:

z = torch.zeros(3,3,dtype=torch.long)
print(z)
print(z.dtype)
print("\n")
## construct a 3X3 matrix of ones

o = torch.ones(3,3)
print(o)
print(o.dtype)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
torch.int64


tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
torch.float32


In [46]:
r2_like = torch.randn_like(r2,dtype=torch.double)
print(r2_like)

tensor([[ 0.4651,  0.4341,  0.1351, -0.4641],
        [-0.8804,  0.0610, -0.9686, -0.4326],
        [-0.2348,  0.1646, -1.2201,  0.9408],
        [-0.4633,  0.0672, -2.0406, -0.5006]], dtype=torch.float64)


In [48]:
## Add two tensors , make sure they are of same size and data type
add_result = torch.add(r,r2)
print(r)
print(r2)
print(add_result)

tensor([[0.0985, 0.6153, 0.8002, 0.8301],
        [0.7281, 0.4216, 0.7838, 0.4438],
        [0.7288, 0.4235, 0.6825, 0.2432],
        [0.8579, 0.7832, 0.6422, 0.3423]])
tensor([[ 1.1514, -0.5789, -0.2085,  0.1305],
        [-0.5260,  0.2188, -0.4218,  1.7036],
        [-1.6577,  0.1456, -0.8763, -0.7859],
        [-1.8275, -0.3673,  0.8895,  1.3448]])
tensor([[ 1.2498,  0.0364,  0.5918,  0.9606],
        [ 0.2021,  0.6404,  0.3620,  2.1474],
        [-0.9288,  0.5691, -0.1938, -0.5427],
        [-0.9696,  0.4159,  1.5317,  1.6871]])


In [55]:
## inplace addition change the value of r2
r2.add_(r)
print(r2)

tensor([[ 1.2498,  0.0364,  0.5918,  0.9606],
        [ 0.2021,  0.6404,  0.3620,  2.1474],
        [-0.9288,  0.5691, -0.1938, -0.5427],
        [-0.9696,  0.4159,  1.5317,  1.6871]])


In [56]:
print(r2)
print(r2[:,1]) ## all the rows , 1st column
print(r2[:,:2]) ## all the rows from column 1 until 2
print(r2[:3,:]) ## all the rows until 3 and all columns

tensor([[ 1.2498,  0.0364,  0.5918,  0.9606],
        [ 0.2021,  0.6404,  0.3620,  2.1474],
        [-0.9288,  0.5691, -0.1938, -0.5427],
        [-0.9696,  0.4159,  1.5317,  1.6871]])
tensor([0.0364, 0.6404, 0.5691, 0.4159])
tensor([[ 1.2498,  0.0364],
        [ 0.2021,  0.6404],
        [-0.9288,  0.5691],
        [-0.9696,  0.4159]])
tensor([[ 1.2498,  0.0364,  0.5918,  0.9606],
        [ 0.2021,  0.6404,  0.3620,  2.1474],
        [-0.9288,  0.5691, -0.1938, -0.5427]])


In [59]:
print(r2)
num_ten = r2[2,3]
print(num_ten)
print(num_ten.item())
print(r2[2,:])

tensor([[ 1.2498,  0.0364,  0.5918,  0.9606],
        [ 0.2021,  0.6404,  0.3620,  2.1474],
        [-0.9288,  0.5691, -0.1938, -0.5427],
        [-0.9696,  0.4159,  1.5317,  1.6871]])
tensor(-0.5427)
-0.5426946878433228
tensor([-0.9288,  0.5691, -0.1938, -0.5427])


### Numpy Bridge

In [62]:
import numpy as np

## converting torch tensors into numpy arrays

a = torch.ones(5)
print(a)
b = a.numpy()
print(b)
## See how numpy array change their values
a.add_(1)
print(a)
print(b)

## Numpy array is also changed by torch tensor , this is called numpy bridge

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [63]:
## Opposite is also true
## Converting numpy array to torch tensor
## see how changing the numpy array changes the torch tensor

a = np.ones(5)
b = torch.from_numpy(a)
np.add(a,1,out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [66]:
# # ## Move the tensor to the GPU
# # r2 = r2.cuda()
# # print(r2)

# ## Provide easy switching between cpu and gpu

# CUDA= torch.cuda.is_available()
# print(CUDA)
# if CUDA:
#     add_result = add_result.cuda()
#     print(add_result)

In [68]:
## converting a list to tensor
a = [1,2,3,4]
print(a)
to_list = torch.tensor(a)
print(to_list,to_list.dtype)

[1, 2, 3, 4]
tensor([1, 2, 3, 4]) torch.int64


In [72]:
data = [[1.,2.],[3.,4.],
       [5.,6.],[7.,8.]]

T = torch.tensor(data)
print(T,T.dtype)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.],
        [7., 8.]]) torch.float32


In [73]:
## Tensor Concatenation
first_1 = torch.randn(2,5)
print(first_1)
second_1 = torch.randn(3,5)
print(second_1)

## Concatenate along the 0 dimension (concatenate rows)
con_1 = torch.cat([first_1,second_1])
print('\n')
print(con_1)
print('\n')

first_2 = torch.randn(2,3)
print(first_2)
second_2 = torch.randn(2,5)
print(second_2)

## Concatenate along the 1 dimension ( concatenate columns)
con_2 = torch.cat([first_2,second_2],1)
print('\n')
print(con_2)
print('\n')

tensor([[ 1.2032, -0.9032,  0.6506,  0.8014, -0.6698],
        [-0.9130, -0.6001,  1.3284, -0.6865,  0.5031]])
tensor([[-0.8854, -0.1278, -2.0463, -2.0775, -0.7252],
        [ 1.3985, -0.7678,  0.0188,  0.0072, -0.8908],
        [ 0.5041,  1.2041, -0.7236, -0.1657,  0.4814]])


tensor([[ 1.2032, -0.9032,  0.6506,  0.8014, -0.6698],
        [-0.9130, -0.6001,  1.3284, -0.6865,  0.5031],
        [-0.8854, -0.1278, -2.0463, -2.0775, -0.7252],
        [ 1.3985, -0.7678,  0.0188,  0.0072, -0.8908],
        [ 0.5041,  1.2041, -0.7236, -0.1657,  0.4814]])


tensor([[-0.8726, -0.6881, -0.2312],
        [ 1.0632,  0.3979,  0.8560]])
tensor([[ 0.0127,  2.0350,  1.5394, -0.3546,  1.0619],
        [-0.2970,  0.5001,  0.1746,  0.2811,  1.2592]])


tensor([[-0.8726, -0.6881, -0.2312,  0.0127,  2.0350,  1.5394, -0.3546,  1.0619],
        [ 1.0632,  0.3979,  0.8560, -0.2970,  0.5001,  0.1746,  0.2811,  1.2592]])




In [84]:
# Addition dimensions to tensor in specifies index

tensor_1 = torch.tensor([1,2,3,4])
tensor_a = torch.unsqueeze(tensor_1,0) ## adds 1 dimention in 0th index
print("Tensor A")
print(tensor_1)
print(tensor_a)
print(tensor_a.shape)

print('\nTensor B')
tensor_b = torch.unsqueeze(tensor_1,1) ## adds 1 dimension in 1st index
print(tensor_b)
print(tensor_b.shape)
print('\nTensor2')

tensor_2 = torch.randn(2,3,4)
print(tensor_2)
print('\n')
tensor_c = tensor_2[:,:2]
print(tensor_c)
print(tensor_c.shape)

print("\n Tensor D")
tensor_d = torch.unsqueeze(tensor_c,2) ## adds 1 dimension in 2nd index
print(tensor_d)
print(tensor_d.shape)

Tensor A
tensor([1, 2, 3, 4])
tensor([[1, 2, 3, 4]])
torch.Size([1, 4])

Tensor B
tensor([[1],
        [2],
        [3],
        [4]])
torch.Size([4, 1])

Tensor2
tensor([[[-0.1719, -0.8990, -0.2325, -2.5292],
         [ 0.9739,  0.0042, -1.2130,  0.7109],
         [-0.6044, -0.4443,  0.3477,  0.2535]],

        [[-0.7062,  0.1720,  0.4081, -0.2830],
         [-1.5178, -0.3286,  0.4157, -1.5549],
         [ 1.7189,  2.8030, -0.8125,  1.0766]]])


tensor([[[-0.1719, -0.8990, -0.2325, -2.5292],
         [ 0.9739,  0.0042, -1.2130,  0.7109]],

        [[-0.7062,  0.1720,  0.4081, -0.2830],
         [-1.5178, -0.3286,  0.4157, -1.5549]]])
torch.Size([2, 2, 4])

 Tensor D
tensor([[[[-0.1719, -0.8990, -0.2325, -2.5292]],

         [[ 0.9739,  0.0042, -1.2130,  0.7109]]],


        [[[-0.7062,  0.1720,  0.4081, -0.2830]],

         [[-1.5178, -0.3286,  0.4157, -1.5549]]]])
torch.Size([2, 2, 1, 4])


### Automatic Differentiation

In [92]:
## Remember it requires_grad=True , the tensor object keeps track of how it was created
x = torch.tensor([1.,2.,3.],requires_grad=True)
y = torch.tensor([4.,5.,6.],requires_grad=True)

## notice that both x and y have requires grad = true , therefore we can compute gradient with respect to them
z = x + y
print(z)

## z knows that it was created as a result of addition of x and y.It knows that it wasnt read in from a file

print(z.grad_fn)

# If we go further on this
s= z.sum()
print(s)
print(s.grad_fn)

tensor([5., 7., 9.], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7f8ae4e12280>
tensor(21., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x7f8b02f13550>


In [93]:
## Now if we backpropograte on x, we can find gradient of s with respect to x
## s.backward all the gradient backwards
s.backward()
print(x.grad)

tensor([1., 1., 1.])


In [95]:
## By default tensors have requires_grad = False
x = torch.randn(2,2)
y = torch.randn(2,2)

print(x.requires_grad,y.requires_grad)
z= x+y
## So you cant backpropograte through z
print(z.grad_fn)
## Another way to set the requires_grad=True is 
x.requires_grad_()
y.requires_grad_()
## z contains enough information to compute gradients, as we saw above

z = x + y
print(z.grad_fn)

## If any input to the operation has `requires_gradient=True` , so will the output
print(z.requires_grad)

## now z has computation history that relates to x and y
new_z = z.detach()
print(new_z.grad_fn)

## z.detach() returns a tensor , that shares the same storage as z, but with the computation history forgotten
## It doesnt know how it was computed,In other words, we have broken tensor away from its past history

## we can also stop autograd from tracking history on tensors. This is useful in transfer learning
print(x.requires_grad)
print((x+10).requires_grad)

with torch.no_grad():
    print((x+10).requires_grad)


False False
None
<AddBackward0 object at 0x7f8ae4bb10a0>
True
None
True
True
False


In [98]:
## Last example
x = torch.ones(2,2,requires_grad=True)
print(x)
y = x + 2
print(y)
print(y.grad_fn)
z = y * y * 3
out = z.mean()
print(z,out)
out.backward()
print(x.grad)


tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7f8ae4be0160>
tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


### Loss Functions

In [99]:
import torch
import torch.nn as nn


In [100]:
## prediction vs actual loss

prediction = torch.randn(4,5)


In [101]:
label = torch.randn(4,5)

In [109]:
mse = nn.MSELoss(reduction = 'none')

In [110]:
loss = mse(prediction,label)

In [111]:
loss

tensor([[3.7036e+00, 1.2059e+00, 1.4207e-02, 4.2852e-02, 7.4942e-02],
        [4.6090e-01, 4.3910e-01, 2.3703e-01, 3.3099e-01, 4.6521e-01],
        [1.4150e+00, 1.1588e-01, 1.2859e+00, 1.3314e+01, 4.2292e+00],
        [1.2486e-02, 3.5775e-01, 1.1613e+00, 2.3983e-01, 5.2713e+00]])

In [114]:
mse = nn.MSELoss(reduction = 'mean')
loss = mse(prediction,label)
loss

tensor(1.7189)

In [115]:
((prediction-label)**2).mean()

tensor(1.7189)

In [116]:
## Binary cross entropy loss

labels = torch.zeros(4,5).random_(0,2)
print(labels)



tensor([[1., 0., 0., 0., 1.],
        [1., 1., 0., 1., 0.],
        [1., 1., 0., 0., 0.],
        [0., 1., 0., 0., 1.]])


In [117]:
sigmoid = nn.Sigmoid()
bce = nn.BCELoss(reduction='mean')

In [118]:
bce(sigmoid(prediction),labels)

tensor(0.7058)

In [119]:
## Logit loss
bces = bce = nn.BCEWithLogitsLoss(reduction='mean')

In [120]:
bces(sigmoid(prediction),labels)

tensor(0.7366)

In [122]:
import numpy as np
x = prediction.numpy()
y = label.numpy()
x

array([[ 2.0100904 , -1.700849  ,  0.4112881 , -1.3221304 ,  0.18271105],
       [ 0.04108731,  0.0753277 ,  0.6498562 , -0.02868167, -0.0298144 ],
       [-0.19119899, -0.6911693 ,  0.31792992,  1.837236  ,  1.3925639 ],
       [ 0.4016186 ,  1.5867542 , -1.5235881 , -1.0291256 ,  0.98241454]],
      dtype=float32)

In [126]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [128]:
x = sigmoid(x)

In [129]:
loss_values = []
for i in range(len(y)):
    batch_loss = []
    for j in range(len(y[0])):
        if y[i][j] == 1:
            loss = -np.log(x[i][j])
        else:
            loss = -np.log(1-x[i][j])
        batch_loss.append(loss)
    loss_values.append(batch_loss)    

In [130]:
loss_values

[[2.135820994874937,
  0.1676549295874038,
  0.9197886913143684,
  0.23630994310155407,
  0.788669849655287],
 [0.7139019111052805,
  0.7315201505794195,
  1.0699609001453843,
  0.6789091009383619,
  0.67835101767548],
 [0.6021103758010408,
  0.4061248510851498,
  0.8646942522721727,
  1.985015722200497,
  1.6144567394224514],
 [0.9139846665770995,
  1.7728919218868449,
  0.19715152036495354,
  0.3055116237966636,
  1.3004361443071186]]

In [131]:
np.mean(loss_values)

0.9041632653345737

### Weight initialization in pytorch

In [132]:
import torch
import torch.nn as nn


In [133]:
layer = nn.Linear(5,5)

In [134]:
layer.weight

Parameter containing:
tensor([[ 0.0644,  0.0366, -0.2197, -0.1067, -0.1918],
        [ 0.1494, -0.1288, -0.2059, -0.0437, -0.4194],
        [-0.0296, -0.2501,  0.1305,  0.2677,  0.3243],
        [-0.0565, -0.2729, -0.3631, -0.2353,  0.1042],
        [-0.2542, -0.2287,  0.0077,  0.3979,  0.1597]], requires_grad=True)

In [135]:
layer.weight.data

tensor([[ 0.0644,  0.0366, -0.2197, -0.1067, -0.1918],
        [ 0.1494, -0.1288, -0.2059, -0.0437, -0.4194],
        [-0.0296, -0.2501,  0.1305,  0.2677,  0.3243],
        [-0.0565, -0.2729, -0.3631, -0.2353,  0.1042],
        [-0.2542, -0.2287,  0.0077,  0.3979,  0.1597]])

In [138]:
## uniform distribution
nn.init.uniform_(layer.weight,a=0.0,b=3)

Parameter containing:
tensor([[2.6472, 2.7981, 1.3748, 1.0565, 0.8343],
        [2.5088, 2.4886, 1.7272, 1.8985, 2.0908],
        [1.0839, 2.8738, 0.9745, 2.5382, 0.2847],
        [2.6467, 1.0689, 2.9875, 0.4700, 1.5448],
        [0.7291, 0.7609, 2.3468, 1.9101, 0.7293]], requires_grad=True)

In [140]:
## normal distribution

nn.init.normal_(layer.weight,mean=0.0,std=0.2)

Parameter containing:
tensor([[ 0.0709,  0.1763, -0.2022,  0.3661,  0.0902],
        [ 0.0201, -0.0560, -0.0389,  0.2272, -0.0984],
        [ 0.0634, -0.1508,  0.0430,  0.0422, -0.0132],
        [ 0.2018,  0.4202,  0.0434, -0.1099, -0.0116],
        [ 0.0076, -0.1469,  0.0715, -0.0126,  0.0374]], requires_grad=True)

In [143]:
nn.init.constant_(layer.bias,0)

Parameter containing:
tensor([0., 0., 0., 0., 0.], requires_grad=True)

In [145]:
nn.init.zeros_(layer.bias)

Parameter containing:
tensor([0., 0., 0., 0., 0.], requires_grad=True)

In [146]:
nn.init.xavier_uniform_(layer.weight,gain=1.0)

Parameter containing:
tensor([[ 0.0570,  0.1387,  0.4449,  0.5295,  0.5248],
        [ 0.6510, -0.6217, -0.6725,  0.2622, -0.5753],
        [-0.2888,  0.1086, -0.7437,  0.2947,  0.0413],
        [-0.1053,  0.5097, -0.4755,  0.5997, -0.4702],
        [ 0.2749, -0.3720, -0.4941, -0.5165,  0.6128]], requires_grad=True)

In [147]:
nn.init.xavier_normal_(layer.weight,gain=1.0)

Parameter containing:
tensor([[ 0.3563, -0.2745,  0.0134, -0.1733,  0.0254],
        [-1.0710,  0.5014, -0.1427, -0.3529, -0.5949],
        [ 0.4529,  0.7473,  0.3265, -0.6666, -0.0133],
        [ 0.1799, -0.4078, -0.0697,  0.1520, -0.2919],
        [-0.3970, -0.2188,  0.1238, -0.4857,  0.7377]], requires_grad=True)