In [1]:
import torch

In [2]:
x = torch.empty(5,3)
print(x)

tensor([[9.2755e-39, 1.0561e-38, 1.0286e-38],
        [8.4490e-39, 1.0102e-38, 9.0919e-39],
        [1.0102e-38, 8.9082e-39, 8.4489e-39],
        [9.6429e-39, 8.4490e-39, 9.6429e-39],
        [9.2755e-39, 1.0286e-38, 9.0919e-39]])


In [3]:
#randomly initialized matrix
x = torch.rand(5,3)
print(x)

tensor([[0.6721, 0.2574, 0.0955],
        [0.7201, 0.4864, 0.9038],
        [0.0390, 0.4256, 0.7368],
        [0.5942, 0.2011, 0.9855],
        [0.2543, 0.2080, 0.1762]])


In [4]:
#construct a matrix filled wiht zeros with long datatype
x = torch.zeros(5,3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [5]:
#construct a tensor directyly from data
x = torch.tensor([5.5,3])
print(x)

tensor([5.5000, 3.0000])


In [6]:
#create tensor based on an existing tensor. These methods will reuse properties of the input tensor
# e.g dtype, unless new values are provided by user
x = x.new_ones(5,3,dtype = torch.double)  #new_* methods take in sizes
print(x)
x = torch.randn_like(x, dtype=torch.float)     #override dtype!
print(x)                                       # result has the same size

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[-0.3752,  0.0270, -1.7324],
        [-1.1739, -1.5896,  0.1725],
        [-1.3745, -1.7261, -0.7349],
        [ 1.6789,  1.3476, -0.3052],
        [-0.0278,  1.1385, -0.3483]])


In [7]:
print(x.size())

torch.Size([5, 3])



# Operations

In [8]:
y = torch.rand(5,3)
print(x+y)

tensor([[ 0.2088,  0.0325, -1.5197],
        [-0.5401, -1.0705,  0.7391],
        [-0.6993, -0.9224, -0.7262],
        [ 1.7583,  1.5456,  0.1934],
        [-0.0117,  1.5581,  0.2610]])


In [9]:
print(torch.add(x,y))

tensor([[ 0.2088,  0.0325, -1.5197],
        [-0.5401, -1.0705,  0.7391],
        [-0.6993, -0.9224, -0.7262],
        [ 1.7583,  1.5456,  0.1934],
        [-0.0117,  1.5581,  0.2610]])


In [13]:
result = torch.empty(5,3)
torch.add(x,y, out=result)
#print(result)   

tensor([[ 0.2088,  0.0325, -1.5197],
        [-0.5401, -1.0705,  0.7391],
        [-0.6993, -0.9224, -0.7262],
        [ 1.7583,  1.5456,  0.1934],
        [-0.0117,  1.5581,  0.2610]])

In [14]:
#add x to y
y.add_(x)
print(y)

tensor([[ 0.2088,  0.0325, -1.5197],
        [-0.5401, -1.0705,  0.7391],
        [-0.6993, -0.9224, -0.7262],
        [ 1.7583,  1.5456,  0.1934],
        [-0.0117,  1.5581,  0.2610]])


'''Any operation that mutates a tensor in-place is post-fixed with an _. For example: x.copy_(y), x.t_(), will change x.'''

In [15]:
x


tensor([[-0.3752,  0.0270, -1.7324],
        [-1.1739, -1.5896,  0.1725],
        [-1.3745, -1.7261, -0.7349],
        [ 1.6789,  1.3476, -0.3052],
        [-0.0278,  1.1385, -0.3483]])

In [16]:
print(x[:,1])
#print(x[1,:])



tensor([ 0.0270, -1.5896, -1.7261,  1.3476,  1.1385])


# torch.view
If there is any situation that you don't know how many rows you want but are sure of the number of columns, then you can specify this with a -1. (Note that you can extend this to tensors with more dimensions. Only one of the axis value can be -1). This is a way of telling the library: "give me a tensor that has these many columns and you compute the appropriate number of rows that is necessary to make this happen".

In [17]:
#Resizing/Reshaping the tensor
x = torch.randn(4,4)
print(x)
y = x.view(16)
z = x.view(-1,8)
y.size(),z

tensor([[ 0.1662,  0.7501,  1.4537, -0.2377],
        [-0.1699,  0.3264, -1.6596, -0.1646],
        [ 0.0321,  1.4180, -0.6246, -0.7299],
        [-0.6686, -0.5971, -0.3324, -1.4054]])


(torch.Size([16]),
 tensor([[ 0.1662,  0.7501,  1.4537, -0.2377, -0.1699,  0.3264, -1.6596, -0.1646],
         [ 0.0321,  1.4180, -0.6246, -0.7299, -0.6686, -0.5971, -0.3324, -1.4054]]))

In [18]:
#if you have only one element tensor, use .item() to the value as python scalars
x = torch.randn(1)
print(x)
x.item()

tensor([0.1029])


0.1028614342212677

# converting a torch tensor to numpy

In [19]:
a = torch.ones(5)
print(a)


tensor([1., 1., 1., 1., 1.])


In [20]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [21]:
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [22]:
#Converting numpy array to torch tensor
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
print(a)
print(b)
np.add(a,1,out=a)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


array([2., 2., 2., 2., 2.])

In [23]:
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)                       # or just use strings ``.to("cuda")``
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))       # ``.to`` can also change dtype together!
else:
    print('NOPE')

NOPE


In [24]:
print('Aamir')

Aamir


# AUTOGRAD: AUTOMATIC DIFFERENTIATION
The autograd package provides automatic differentiation for all operation on Tensor. It is a define-by-run framework, which means that your backprop is defined by how your code is run, and that every single iteration can be different.

#Tensor
------
torch.Tensor is central class of the package.
If you set its attribute **.requires_grad()** as True, it starts to track all operation on it. When you finish your computtion you can call .backward() and have all the gradients computed automatically. The gradient for this tensor will be accumulated inot .grad attribute.

To stop a tensor from tracking history, you can call **detach()** to detach it from the computation history,
and to prevent future computation from being tracked.

To prevent tracking history(and using memory), you can also wrap the code block with **torch.no_grad()**: This can be particularly helpful when evaluating a model because the model may have trainaible parameters with requires_grad=True, but for which we don't need the gradients.

There is one more clas which is very important for aut0grad implementation a Function

Tensor and Function are interconnected and build up and acyclic graph, that encodes a complete history of computation. Each tensor has a **.grad_fn** attribute that references a Function that has created the Tensor.

If you want to compute the derivatives, you can call **.backward()** on a Tensor. If Tensor is scalar you don't need to specify any argumennts to backward(), however if it has more elements, you don't need to specify a gradient, but you need to speciy a gradient argument that is a tensor of matching shape.

In [25]:
x = torch.ones(2,2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [26]:
y = x+2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [27]:
print(x.grad_fn)
print(y.grad_fn)

None
<AddBackward0 object at 0x000001C2872C8348>


In [28]:
z = y*y*3
out = z.mean()
print(z,out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [29]:
a = torch.randn(2,2)
a = ((a*3)/(a-1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a*a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x000001C2872C3D08>


**Gradients**

Let’s backprop now. Because **out** contains a single scalar, **out.backward()** is equivalent to **out.backward(torch.tensor(1.))**.

In [30]:
out.backward()

**Print gradients d(out)/dx**

In [31]:
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [32]:
x = torch.randn(3,requires_grad=True)
y = x * 2
while y.data.norm()<1000:
    y=y*2
print(y)

tensor([ -340.9655,  -371.3545, -1325.0345], grad_fn=<MulBackward0>)


You can also stop autograd from tracking history on Tensor with **.require_grad=True** either
by wrapping the code block in **with torch.no_grad()**:

In [33]:
print(x.requires_grad)
print((x**2).requires_grad)
with torch.no_grad():
    print((x**2).requires_grad)

True
True
False


Or by using **.detach()** to get new Tensor with some content but 
that does not require gradients 

In [34]:
print(x.requires_grad)
y = x.detach()
print(y.requires_grad)
print(x.eq(y).all())

True
False
tensor(True)


In [35]:
import numpy as np

In [36]:
'''N is batch size; D_in is input dimension
H is hidden dimension; D_out is output dimension'''
N, D_in, H, D_out = 64, 1000, 100, 10

x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6

for t in range(500):
    h = x.dot(w1)
    h_relu = np.maximum(h,0)
    #print(h_relu)
    y_pred = h_relu.dot(w2)
    
    loss = np.square(y_pred - y).sum()
    print(t,loss)
    
    #Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    #update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2
    

0 28908520.949068658
1 23467604.360672787
2 24180917.356898397
3 27002666.35619718
4 28550498.96604411
5 25857656.98739277
6 19237609.258243036
7 11752405.832338084
8 6338364.31508195
9 3297303.9644351555
10 1821877.940887975
11 1122950.040700867
12 778875.7445060022
13 592255.93558374
14 478107.95307557227
15 399650.0609622641
16 340853.999475639
17 294273.50134341273
18 256055.60325623833
19 224053.8768121822
20 196932.41476286325
21 173753.88637135288
22 153835.66916082596
23 136607.1900396084
24 121657.15584547547
25 108619.8864984327
26 97210.87079362725
27 87206.31840261898
28 78402.41101166014
29 70627.45804304184
30 63747.25632857827
31 57638.514810671135
32 52204.18377443318
33 47363.419667075956
34 43037.30964485081
35 39165.51339254706
36 35694.79114108816
37 32572.775689395705
38 29762.548943030066
39 27229.1400082502
40 24942.82337302801
41 22872.852175284796
42 20996.880012215217
43 19294.605288538925
44 17748.64421685002
45 16341.976654786355
46 15061.15350928661
47 1389

390 0.00043783973665931545
391 0.00041900722799718254
392 0.00040098974735705876
393 0.0003837534015179692
394 0.0003672635550018819
395 0.0003514830412470171
396 0.0003363848693194841
397 0.00032193895590575735
398 0.0003081169395572833
399 0.00029489161575986045
400 0.0002822372642342181
401 0.00027012862822442453
402 0.00025854464950753167
403 0.0002474583728037605
404 0.00023685003471529453
405 0.0002266990829990971
406 0.00021698552300931245
407 0.0002076900656534485
408 0.00019879483482071605
409 0.00019028275795681613
410 0.00018213856623982495
411 0.00017434348005724716
412 0.00016688396507548675
413 0.00015974567033727704
414 0.0001529138939548298
415 0.00014637561630650276
416 0.00014011870794858477
417 0.00013413013894725257
418 0.0001283997824518895
419 0.00012291496195410414
420 0.0001176652815190489
421 0.0001126411408454205
422 0.00010783253720918066
423 0.00010323018417235174
424 9.882513644238665e-05
425 9.460902871447625e-05
426 9.057431480129766e-05
427 8.67121060134

In [37]:
t = torch.Tensor()

In [38]:
print(t.device)
print(t.dtype)
print(t.layout)

cpu
torch.float32
torch.strided


In [39]:
device = torch.device('cuda:0')
device

device(type='cuda', index=0)

In [40]:
data = np.array([1,2,3])
type(data)

numpy.ndarray

**Tensor** is class contructor

In [41]:
torch.Tensor(data)

tensor([1., 2., 3.])

    .tensor is factory function builds tensor object. functions accepts as parameter input and returns a particular type of object. Factory function is an OOP concept which help in creating object instead from the constructor.

In [42]:
torch.tensor(data)

tensor([1, 2, 3], dtype=torch.int32)

Returns Idnetity matrix

In [43]:
torch.eye(2)

tensor([[1., 0.],
        [0., 1.]])

In [44]:
t1 = torch.Tensor(data) #constructor
t2 = torch.tensor(data) #factory func
t3 = torch.as_tensor(data)#factory functions
t4 = torch.from_numpy(data)#factory func

In [45]:
 print(t1)
print(t2)
print(t3)
print(t4)

tensor([1., 2., 3.])
tensor([1, 2, 3], dtype=torch.int32)
tensor([1, 2, 3], dtype=torch.int32)
tensor([1, 2, 3], dtype=torch.int32)


In [46]:
 print(t1.dtype)
print(t2.dtype)
print(t3.dtype)
print(t4.dtype)

torch.float32
torch.int32
torch.int32
torch.int32


In [47]:
#dtype() infer based on the incoming data

torch.get_default_dtype()

torch.float32

In [48]:
data = np.array([1,2,3])

In [49]:
t1 = torch.tensor(data)
t2 = torch.Tensor(data)
t3 = torch.as_tensor(data)#takes any arraya
t4 = torch.from_numpy(data)#in this case it only takes from numpy array

In [50]:
data[0]=0
data[1]=0
data[2]=0

In [51]:
print(t1)
print(t2)

tensor([1, 2, 3], dtype=torch.int32)
tensor([1., 2., 3.])


In [52]:
print(t3)
print(t4)

tensor([0, 0, 0], dtype=torch.int32)
tensor([0, 0, 0], dtype=torch.int32)


In **t1&t2** it cretes an additional copy where as in
case of **t3&t4** it shares a data in memory

# Tensor Operations
1. Reshaping operations
2. Element wise operation
3. Reduction operation
4. Access operations

In [53]:
t = torch.tensor([
    [1,1,1,1],
    [2,2,2,2],
    [3,3,3,3]
], dtype=torch.float32)

In [54]:
torch.tensor(t.shape).prod()

tensor(12)

In [55]:
t.numel()

12

In [56]:
t.reshape(1,12)


tensor([[1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.]])

In [57]:
t.reshape(2,6)

tensor([[1., 1., 1., 1., 2., 2.],
        [2., 2., 3., 3., 3., 3.]])

In [58]:
t.reshape(3,4)

tensor([[1., 1., 1., 1.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.]])

In [59]:
t.reshape(4,3)

tensor([[1., 1., 1.],
        [1., 2., 2.],
        [2., 2., 3.],
        [3., 3., 3.]])

In [60]:
t.reshape(6,2)

tensor([[1., 1.],
        [1., 1.],
        [2., 2.],
        [2., 2.],
        [3., 3.],
        [3., 3.]])

In [61]:
t.reshape(12)

tensor([1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.])

Next way to change the shape is by **squezzing** and **unsquezing** them.

**Squeezing** a tensor removes all the axis that have a length of one while **unsqueezing** a tensor adds dimension with a length of one.

In [62]:
print(t.reshape(1,12).squeeze())

tensor([1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.])


In [63]:
print(t.reshape(1,12).squeeze().shape)

torch.Size([12])


In [64]:
print(t.reshape(1,12).squeeze().unsqueeze(dim=0))

tensor([[1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.]])


In [65]:
print(t.reshape(1,12).squeeze().unsqueeze(dim=0).shape)

torch.Size([1, 12])


Since input tensor can be of any shape we pass the **-1** of reshape function -1 tells to figure out what the value should be based on the other value and the number of the value contained within the tensor.

In [65]:
def flatten(t):
    t = t.reshape(1,-1)
    t = t.squeeze()
    return t

After squeezing the first axis is removed

In [66]:
flatten(t)

tensor([1., 1., 1., 1., 2., 2., 2., 2., 3., 3., 3., 3.])

## CNN Flatten operation Visualized - Tensor batch procession for Deep Learning

![](Pic.png)

In [69]:
t1 = torch.tensor([
    [1,1,1,1],
    [1,1,1,1],
    [1,1,1,1],
    [1,1,1,1]

])

In [70]:
t2 = torch.tensor([
    [2,2,2,2],
    [2,2,2,2],
    [2,2,2,2],
    [2,2,2,2]
])

In [71]:
t3 = torch.tensor([
    [3,3,3,3],
    [3,3,3,3],
    [3,3,3,3],
    [3,3,3,3]
])

In [75]:
t = torch.stack((t1,t2,t3))
t.shape

torch.Size([3, 4, 4])

In [76]:
t  = t.reshape(3,1,4,4)

In [77]:
t

tensor([[[[1, 1, 1, 1],
          [1, 1, 1, 1],
          [1, 1, 1, 1],
          [1, 1, 1, 1]]],


        [[[2, 2, 2, 2],
          [2, 2, 2, 2],
          [2, 2, 2, 2],
          [2, 2, 2, 2]]],


        [[[3, 3, 3, 3],
          [3, 3, 3, 3],
          [3, 3, 3, 3],
          [3, 3, 3, 3]]]])

In [78]:
t.reshape(1,-1)[0]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [79]:
t.reshape(-1)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [80]:
t.view(t.numel())

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [81]:
t.flatten()

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [85]:
t.flatten(start_dim=1).shape

torch.Size([3, 16])

In [86]:
t.flatten(start_dim =1)

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]])

In [89]:
t.reshape(3,-1)

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]])

# Broadcasting

In [90]:
t = torch.tensor([
    [1,2],
    [3,4]
])

In [93]:
np.broadcast_todcast_to(2,t.shape)

array([[2, 2],
       [2, 2]])

In [95]:
t + 2

tensor([[3, 4],
        [5, 6]])

In [97]:
t + torch.tensor(
    np.broadcast_to(2,t.shape), dtype=torch.float32)

tensor([[3., 4.],
        [5., 6.]])

In [103]:
t1 = torch.tensor([
    [1,2],
    [2,3]

    ],dtype=torch.float32
)
t2 = torch.tensor([2,4], dtype=torch.float32)

In [105]:
#t1+t2

In [106]:
t1.shape

torch.Size([2, 2])

In [107]:
t2.shape

torch.Size([2])

even though they have different dimension the elment wise opearion
is possible. And braodcasting is what makes it possible.
Lower rank **t2** transform into the higher rank tensor **t1** via broadcasting.

In [None]:
np.broadcast_to(t)