In [None]:
import torch

In [None]:
# create tensor
tensor = torch.tensor([[1, 2, 3],[4,5,6]])
print(tensor)

tensor([[1, 2, 3],
        [4, 5, 6]])


In [None]:
tensor.shape

torch.Size([2, 3])

In [None]:
# num of dim
tensor.ndim

2

In [None]:
# transpose
tensor.T

tensor([[1, 4],
        [2, 5],
        [3, 6]])

In [None]:
z = torch.zeros(3,4)
z

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
# numpy to tensor
import numpy as np
array = np.random.randn(3,4)
tensor = torch.from_numpy(array)
print(type(tensor))
print(type(array))

<class 'torch.Tensor'>
<class 'numpy.ndarray'>


In [None]:
# tensor to numpy
n = tensor.numpy()
print(type(tensor))
print(type(n))

<class 'torch.Tensor'>
<class 'numpy.ndarray'>


In [None]:
# resize
x = torch.tensor([[1,2,3],[5,6,7]])
print(x.shape)
print(x.ndim)
print(x)
print('*'*50)
x = x.view(3,2) # x.view(3,2)
print(x.shape)
print(x.ndim)
print(x)

torch.Size([2, 3])
2
tensor([[1, 2, 3],
        [5, 6, 7]])
**************************************************
torch.Size([3, 2])
2
tensor([[1, 2],
        [3, 5],
        [6, 7]])


<h1>Autograd<h1>

In [None]:
a = torch.tensor([[1.0,2.0],[3.0,4.0]], requires_grad=True) # for remember all operations
print(a)

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)


In [None]:
y = torch.sum(a**3)
print(y)

tensor(100., grad_fn=<SumBackward0>)


In [None]:
y.backward() # compute gradient of y wrt a "y must be a scale value"
print(a.grad.data) # print dy/da_ij

'''
a = [[1.0,2.0],
     [3.0,4.0]]

y = [1**3 , 2**3 , 3**3 , 4**3]

sum(y) = 100

Autograd:-
"requires_grad=True store all operation we did"
y = [1**3 , 2**3 , 3**3 , 4**3]

dy/da_ij = [3*1**2 , 3*2**2 , 3*3**2 , 3*4**2]
dy/da_ij = a.grad = [[ 3., 12.],
                     [27., 48.]]
'''

tensor([[ 3., 12.],
        [27., 48.]])


<h1>Simple Neaural Network Feedforward:<h1>

In [None]:
torch.manual_seed(5) # same random values every run

<torch._C.Generator at 0x7fddc2e08b10>

In [None]:
x = torch.randn(3,5)
w = torch.rand_like(x) # random values with x shape
b = torch.randn(1,1)

In [None]:
def sigmoid(x):
  return 1/1+torch.exp(-x)

In [None]:
w.shape,x.shape

(torch.Size([3, 5]), torch.Size([3, 5]))

In [None]:
# x*w+b
# h = torch.dot(w,x) + b  # error dot func. work with 1D array only

h = torch.mm(w,x.T) + b
print(h)

tensor([[-0.2833, -1.2427, -1.6905],
        [-0.0105, -1.4191, -2.0373],
        [ 0.2523, -0.4974, -1.4267]])


<h1>Pytorch Broadcasting<h1>

In [None]:
a = torch.tensor([[1,2,3],
                  [4,5,6]])
b = torch.tensor([[1],
                  [2]])
z = a+b
print(z)
print(a)
print(b)

'''
[1]   +  [1,2,3]
[2]      [4,5,6]

=
[[1+1,2+1,3+1],
 [4+2,5+2,6+2]]
'''

tensor([[2, 3, 4],
        [6, 7, 8]])
tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[1],
        [2]])


<h1>Pytorch Dataset and Dataloaders:<h1>
We ideally want our dataset preparation code to be decoupled from our model training code for better readability and modularity.

PyTorch provides two data primitives which helps us to do this with ease:

torch.utils.data.DataLoader
torch.utils.data.Dataset

What is Dataset?
Dataset stores the samples and their corresponding labels (optionally)

What is DataLoader?
DataLoader wraps an iterable around the Dataset to enable easy access to the samples.

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader, TensorDataset

x = torch.arange(12,dtype=torch.float16)
x

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.],
       dtype=torch.float16)

In [5]:
dataLoder = DataLoader(x)
for item in dataLoder:
  print(item)

tensor([0.], dtype=torch.float16)
tensor([1.], dtype=torch.float16)
tensor([2.], dtype=torch.float16)
tensor([3.], dtype=torch.float16)
tensor([4.], dtype=torch.float16)
tensor([5.], dtype=torch.float16)
tensor([6.], dtype=torch.float16)
tensor([7.], dtype=torch.float16)
tensor([8.], dtype=torch.float16)
tensor([9.], dtype=torch.float16)
tensor([10.], dtype=torch.float16)
tensor([11.], dtype=torch.float16)


In [6]:
dataLoder = DataLoader(x,batch_size=3,shuffle=True)
for item in dataLoder:
  print(item)

tensor([ 6., 10.,  0.], dtype=torch.float16)
tensor([1., 8., 7.], dtype=torch.float16)
tensor([2., 9., 4.], dtype=torch.float16)
tensor([11.,  5.,  3.], dtype=torch.float16)


<h2>Data with input&output<h2>

<h3>TensorDataset<h3>

In [7]:
from sklearn.datasets import make_classification
X,y = make_classification(n_samples=10)

In [9]:
X.shape,y.shape

((10, 20), (10,))

In [13]:
dataSet = TensorDataset(torch.tensor(X),torch.tensor(y))
dataLoder = DataLoader(dataSet,batch_size=5,shuffle=True)
for i ,batch in enumerate(dataLoder):
    print(f'Batch: {i} \n X: {batch[0]} , \n y: {batch[1]}')

Batch: 0 
 X: tensor([[-0.3206,  0.9559, -1.2441, -0.3008, -0.2937,  0.9566, -0.3012, -0.0625,
         -0.7609,  0.1129, -0.7771, -1.5993,  0.8045, -1.8477, -1.1654, -1.3536,
         -0.7211, -0.2667,  0.1346, -0.5812],
        [-0.5241,  1.1643,  0.5421, -0.2973,  0.3638,  1.3514,  2.0819, -1.5246,
          0.9299,  0.3867, -1.2962,  0.7661, -1.8070, -0.4166, -0.4724,  1.8937,
         -0.9993,  1.0828, -0.3907, -0.3249],
        [-0.0210,  0.3220, -1.3694,  0.3390,  0.0448, -0.4181, -0.2536, -0.6808,
         -0.5340,  0.3453,  1.3144,  0.8984, -0.5238, -0.9109, -0.0569, -0.4063,
          0.5320, -2.2672, -0.0203, -1.7225],
        [-1.2022, -0.1860, -1.8094,  0.0212,  0.4221, -0.2995, -0.0577,  0.8133,
         -0.1584,  0.8549, -0.3459, -0.3330,  1.1024,  0.1859,  0.4311,  1.2836,
         -0.7034, -1.0254, -0.3305, -0.2994],
        [-0.2517,  0.0665, -0.4296,  0.9057,  0.3089, -1.3778, -0.1890,  0.2238,
          2.2169,  1.8454,  0.7807,  2.6279, -1.3051, -1.0137, -0.2341, -

<h3>Custom Dataset class<h3>
The reason you may want to use Custom Dataset class:

There are some special handling before you can get the data sample.

Data should be read from database or disk and you only want to keep a few samples in memory rather than prefetch everything.

Do augmentation that is common in image tasks.

In PyTorch DataLoader expects its first argument can work with len() and with array index.

In [14]:
class CustomDataSet(Dataset):
  def __init__(self,x,y):
    self.x = torch.tensor(x,dtype=torch.float32)
    self.y = torch.tensor(y,dtype=torch.float32)

  def __len__(self):
    return len(self.x)

  def __getitem__(self,idx):
    return self.x[idx],self.y[idx]

dataSet = CustomDataSet(X,y)
dataLoder = DataLoader(dataSet,batch_size=5,shuffle=True)
for i ,batch in enumerate(dataLoder):
    print(f'Batch: {i} \n X: {batch[0]} , \n y: {batch[1]}')

Batch: 0 
 X: tensor([[ 1.2314,  1.0264,  0.0340,  0.7531, -0.9304, -0.5050,  0.3749,  1.1868,
         -1.8398, -1.3585,  0.0414, -0.1378, -1.7293,  0.1489,  1.7329, -0.1951,
         -0.8739, -0.3879,  1.0289, -1.0641],
        [-0.5241,  1.1643,  0.5421, -0.2973,  0.3638,  1.3514,  2.0819, -1.5246,
          0.9299,  0.3867, -1.2962,  0.7661, -1.8070, -0.4166, -0.4724,  1.8937,
         -0.9993,  1.0828, -0.3907, -0.3249],
        [-1.3354, -0.4567,  1.0041, -0.2143, -1.6104, -0.8952, -0.8069,  0.1572,
          0.8461,  0.0779, -1.0106,  1.4679, -0.1644, -0.8772,  0.6624, -0.2067,
          1.9886,  0.6433, -0.2374, -0.1384],
        [-0.4301, -0.1878,  1.5748,  0.7601,  1.8946, -0.0427, -1.8831, -1.2712,
         -0.3124, -1.0721,  3.3963,  0.5880,  0.5979,  0.0075, -0.1491, -0.9672,
          0.4846, -0.2990,  0.5375,  0.4796],
        [ 0.4698, -0.7860,  1.6354, -0.9471,  0.8362, -0.2867,  0.5988,  0.7819,
         -0.8196, -1.4741,  0.1926,  0.0320, -0.0149,  1.8127,  0.7833, -