In [1]:
import torch 
import numpy as np

### Tensors in pytorch

In [2]:
torch.cuda.is_available()

False

In [4]:
some_data = [[2,3],[4,5]]

In [5]:
type(some_data)

list

In [6]:
np.array(some_data)

array([[2, 3],
       [4, 5]])

In [7]:
np.asarray(some_data)

array([[2, 3],
       [4, 5]])

In [8]:
some_data = torch.tensor(some_data)
some_data

tensor([[2, 3],
        [4, 5]])

In [9]:
type(some_data)

torch.Tensor

In [10]:
some_data.dtype

torch.int64

In [14]:
numpy_arr = np.random.rand(3,4)

In [15]:
numpy_arr

array([[0.41996386, 0.52845479, 0.26980113, 0.04505521],
       [0.14900383, 0.99298271, 0.82227998, 0.58211668],
       [0.08985049, 0.5556848 , 0.76893802, 0.56238482]])

In [16]:
torch.from_numpy(numpy_arr)

tensor([[0.4200, 0.5285, 0.2698, 0.0451],
        [0.1490, 0.9930, 0.8223, 0.5821],
        [0.0899, 0.5557, 0.7689, 0.5624]], dtype=torch.float64)

In [17]:
torch.tensor(numpy_arr)

tensor([[0.4200, 0.5285, 0.2698, 0.0451],
        [0.1490, 0.9930, 0.8223, 0.5821],
        [0.0899, 0.5557, 0.7689, 0.5624]], dtype=torch.float64)

In [18]:
torch.ones(3,4)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [19]:
torch.zeros(3,4)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [20]:
my_tensor = torch.rand(3,4)

In [21]:
# tensor can be at cpu or gpu depending on where you want your computation on 
my_tensor.device

device(type='cpu')

In [23]:
# my_tensor.to("cuda") # won't work since i have no gpu
# otherwise

In [24]:
my_tensor

tensor([[0.4795, 0.3378, 0.4855, 0.3059],
        [0.2478, 0.9531, 0.4088, 0.0489],
        [0.6590, 0.9333, 0.7587, 0.9620]])

In [25]:
my_tensor[:,1:3]

tensor([[0.3378, 0.4855],
        [0.9531, 0.4088],
        [0.9333, 0.7587]])

In [26]:
my_tensor.mul(my_tensor) # element wise multiplication

tensor([[0.2299, 0.1141, 0.2357, 0.0936],
        [0.0614, 0.9084, 0.1671, 0.0024],
        [0.4343, 0.8711, 0.5757, 0.9254]])

In [27]:
my_tensor * my_tensor # element wise multiplication

tensor([[0.2299, 0.1141, 0.2357, 0.0936],
        [0.0614, 0.9084, 0.1671, 0.0024],
        [0.4343, 0.8711, 0.5757, 0.9254]])

In [28]:
my_tensor.matmul(my_tensor.T) # matrix multiplication

tensor([[0.6734, 0.6543, 1.2939],
        [0.6543, 1.1393, 1.4101],
        [1.2939, 1.4101, 2.8064]])

In [29]:
torch.matmul(my_tensor,my_tensor.T) # another way to do matrix multiplication

tensor([[0.6734, 0.6543, 1.2939],
        [0.6543, 1.1393, 1.4101],
        [1.2939, 1.4101, 2.8064]])

In [30]:
my_tensor.sum(axis=0) # sum of each column

tensor([1.3863, 2.2243, 1.6531, 1.3168])

In [31]:
my_tensor.sum(axis=1) # sum of each row

tensor([1.6087, 1.6586, 3.3130])

In [32]:
torch.max(my_tensor)

tensor(0.9620)

In [33]:
torch.min(my_tensor)

tensor(0.0489)

In [34]:
torch.cat([my_tensor,my_tensor],dim=1)

tensor([[0.4795, 0.3378, 0.4855, 0.3059, 0.4795, 0.3378, 0.4855, 0.3059],
        [0.2478, 0.9531, 0.4088, 0.0489, 0.2478, 0.9531, 0.4088, 0.0489],
        [0.6590, 0.9333, 0.7587, 0.9620, 0.6590, 0.9333, 0.7587, 0.9620]])

In [35]:
x =torch.tensor([[2,3],[4,9]], dtype=torch.int8)

In [36]:
x.sum(axis=0) # along columns

tensor([ 6, 12])

In [37]:
x.sum(axis=1) # along rows

tensor([ 5, 13])

In [38]:
x.sum(dim=0) # along columns

tensor([ 6, 12])

In [39]:
x.sum(dim=1) # along rows

tensor([ 5, 13])

In [42]:
y = torch.stack([x,x],dim=0) # along rows

In [43]:
print(y,y.shape)

tensor([[[2, 3],
         [4, 9]],

        [[2, 3],
         [4, 9]]], dtype=torch.int8) torch.Size([2, 2, 2])


In [44]:
z = torch.stack([x,x],dim=1) # along columns
print(z,z.shape)

tensor([[[2, 3],
         [2, 3]],

        [[4, 9],
         [4, 9]]], dtype=torch.int8) torch.Size([2, 2, 2])


In [45]:
my_tensor

tensor([[0.4795, 0.3378, 0.4855, 0.3059],
        [0.2478, 0.9531, 0.4088, 0.0489],
        [0.6590, 0.9333, 0.7587, 0.9620]])

In [46]:
torch.nn.functional.softmax(my_tensor,dim=1) # along rows take softmax

tensor([[0.2692, 0.2336, 0.2708, 0.2263],
        [0.1992, 0.4034, 0.2341, 0.1633],
        [0.2095, 0.2756, 0.2314, 0.2836]])

In [47]:
my_tensor.shape 

torch.Size([3, 4])

In [51]:
my_tensor.size()

torch.Size([3, 4])

In [52]:
my_tensor.size(0)

3

In [53]:
torch.rand(10,3,128,128) # batch size, channels, height, width

tensor([[[[0.1553, 0.0618, 0.7889,  ..., 0.3665, 0.3679, 0.5554],
          [0.7732, 0.1350, 0.9570,  ..., 0.3114, 0.8158, 0.5015],
          [0.2236, 0.7300, 0.8065,  ..., 0.0616, 0.6281, 0.8064],
          ...,
          [0.9102, 0.2992, 0.6020,  ..., 0.9822, 0.6906, 0.3926],
          [0.5598, 0.6864, 0.4845,  ..., 0.3341, 0.9633, 0.1551],
          [0.6749, 0.5319, 0.8764,  ..., 0.5804, 0.8119, 0.0344]],

         [[0.9201, 0.9207, 0.7241,  ..., 0.8181, 0.3830, 0.0667],
          [0.9342, 0.5942, 0.6564,  ..., 0.0670, 0.7618, 0.3860],
          [0.7946, 0.2718, 0.3918,  ..., 0.1158, 0.8076, 0.0183],
          ...,
          [0.0279, 0.6458, 0.4937,  ..., 0.2697, 0.5673, 0.1509],
          [0.4887, 0.4984, 0.0437,  ..., 0.4916, 0.6920, 0.1009],
          [0.2746, 0.3278, 0.9769,  ..., 0.8822, 0.8621, 0.2401]],

         [[0.1055, 0.7951, 0.7398,  ..., 0.9106, 0.4798, 0.4283],
          [0.9223, 0.4609, 0.4871,  ..., 0.4942, 0.5835, 0.2203],
          [0.4882, 0.9838, 0.5442,  ..., 0

In [54]:
my_tensor

tensor([[0.4795, 0.3378, 0.4855, 0.3059],
        [0.2478, 0.9531, 0.4088, 0.0489],
        [0.6590, 0.9333, 0.7587, 0.9620]])

In [55]:
my_tensor.clip(0.2,0.8) # all values below 0.2 will be set to 0.2 and all values above 0.8 will be set to 0.8

tensor([[0.4795, 0.3378, 0.4855, 0.3059],
        [0.2478, 0.8000, 0.4088, 0.2000],
        [0.6590, 0.8000, 0.7587, 0.8000]])

In [56]:
my_tensor.cpu().detach().numpy() # bring tensor to cpu and convert to numpy array

array([[0.47948724, 0.33783215, 0.48553532, 0.30589283],
       [0.24775344, 0.9531116 , 0.40883827, 0.04890925],
       [0.65901744, 0.9333352 , 0.7587221 , 0.9619527 ]], dtype=float32)

### Autograd

In [57]:
 # autograd: automatic differention engine

In [58]:
a = torch.tensor([5.], requires_grad=True)
b = torch.tensor([6.], requires_grad=True)

In [59]:
a

tensor([5.], requires_grad=True)

In [61]:
y = a**3 - b**2 
y

tensor([89.], grad_fn=<SubBackward0>)

In [62]:
# dy/da = 3a^2 # 75
# dy/db = 2b   # 12

In [64]:
print(a.grad)

None


In [65]:
print(b.grad)

None


In [66]:
y.backward()

In [67]:
print(a.grad)

tensor([75.])


In [68]:
print(b.grad)

tensor([-12.])


In [69]:
W = torch.randn(10,1, requires_grad=True)

In [74]:
b = torch.randn(1,requires_grad=True)

In [76]:
W

tensor([[ 1.0045],
        [-2.5019],
        [-0.9910],
        [ 1.1298],
        [-0.7724],
        [-1.3997],
        [-0.0159],
        [ 0.4500],
        [ 0.6930],
        [-0.3522]], requires_grad=True)

In [77]:
print(b,b.shape)

tensor([-0.8840], requires_grad=True) torch.Size([1])


In [78]:
x = torch.rand(1,10) # 1 sample with 10 features
x

tensor([[0.3848, 0.0714, 0.2591, 0.8440, 0.9483, 0.7928, 0.6574, 0.1923, 0.1564,
         0.2222]])

In [79]:
output = torch.matmul(x,W) + b
print(output)

tensor([[-1.7153]], grad_fn=<AddBackward0>)


In [80]:
loss = 1-output # demo loss function
loss

tensor([[2.7153]], grad_fn=<RsubBackward1>)

In [81]:
# both output and loss has grad_fn associated
loss.backward()

In [82]:
W.grad

tensor([[-0.3848],
        [-0.0714],
        [-0.2591],
        [-0.8440],
        [-0.9483],
        [-0.7928],
        [-0.6574],
        [-0.1923],
        [-0.1564],
        [-0.2222]])

In [83]:
b.grad

tensor([-1.])

In [84]:
learning_rate = 0.01
with torch.no_grad(): # this temporarily disables gradient calculation for all variables in this context
  W = W - learning_rate * W.grad.data 

In [85]:
W

tensor([[ 1.0083],
        [-2.5012],
        [-0.9884],
        [ 1.1382],
        [-0.7629],
        [-1.3917],
        [-0.0093],
        [ 0.4520],
        [ 0.6946],
        [-0.3500]])

### Dataset class in Pytorch

In [89]:
from sklearn.datasets import  make_classification

In [86]:
# class CustomDataset(torch.utils.data.Dataset): # inherits from Dataset class from pytorch

In [87]:
class CustomDataset:
  def __init__(self, data, targets):
    self.data = data
    self.targets = targets
  def __len__(self):
    return self.data.shape[0] # assuming data is numpy array
  def __getitem__(self, index):
    if index < 0 or index >= self.data.shape[0]:
      raise IndexError("Index out of range")
    current_sample = self.data[index,:]
    current_target = self.targets[index]
    return {
      "sample": torch.tensor(current_sample, dtype=torch.float),
      "target": torch.tensor(current_target, dtype=torch.long)
    }

In [90]:
?make_classification

[0;31mSignature:[0m
[0mmake_classification[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mn_samples[0m[0;34m=[0m[0;36m100[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_features[0m[0;34m=[0m[0;36m20[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_informative[0m[0;34m=[0m[0;36m2[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_redundant[0m[0;34m=[0m[0;36m2[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_repeated[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_classes[0m[0;34m=[0m[0;36m2[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_clusters_per_class[0m[0;34m=[0m[0;36m2[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mweights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mflip_y[0m[0;34m=[0m[0;36m0.01[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclass_sep[0m[0;34m=[0m[0;36m1.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mhypercube[0m[0;34m=[0m[0;32m

In [95]:
data, targets = make_classification(n_samples=1000)

In [96]:
data

array([[ 0.02790105, -0.52432248,  1.15248637, ...,  0.45009305,
         0.56704123, -1.3710777 ],
       [ 0.50790132, -2.13456256, -0.16669463, ...,  1.19902107,
        -1.57414366,  1.93129148],
       [-0.47060167,  0.22475079,  0.62785342, ...,  0.07825731,
         1.13545144, -0.78057703],
       ...,
       [-2.63100236, -0.02015831,  0.61231505, ...,  0.19952068,
         0.53569975, -0.70356895],
       [-0.77504255,  0.81828413, -1.8603931 , ..., -0.36425529,
        -0.23972905, -0.53888846],
       [-0.22830364,  1.2098013 , -0.49653036, ..., -0.31174962,
         0.18577874,  0.78874329]])

In [97]:
targets

array([0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
       0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,

In [98]:
data.shape

(1000, 20)

In [99]:
targets.shape

(1000,)

In [100]:
custom_dataset = CustomDataset(data = data, targets = targets)

In [101]:
len(custom_dataset)

1000

In [103]:
custom_dataset[0]

{'sample': tensor([ 0.0279, -0.5243,  1.1525,  1.4186, -0.0330,  1.5555, -1.0803,  1.1962,
          1.4822,  0.5500, -1.0762,  1.0808, -0.1887, -1.4103, -1.0189, -1.1896,
          1.4552,  0.4501,  0.5670, -1.3711]),
 'target': tensor(0)}

In [104]:
custom_dataset[0]["sample"]

tensor([ 0.0279, -0.5243,  1.1525,  1.4186, -0.0330,  1.5555, -1.0803,  1.1962,
         1.4822,  0.5500, -1.0762,  1.0808, -0.1887, -1.4103, -1.0189, -1.1896,
         1.4552,  0.4501,  0.5670, -1.3711])

In [105]:
custom_dataset[0]["target"]

tensor(0)

### Data set for simple NLP problem

In [106]:
import torch

In [1]:
# classification/reg problems 
class CustomDataset:
  def __init__(self, data, targets, tokenizer):
    self.data = data 
    self.targets = targets
    self.tokenizer = tokenizer
  def __len__(self):
    return len(self.data)
  def __getitem__(self, index):
    text = self.data[index]
    input_ids = self.tokenizer(text)
    # input_ids are tokens, it can be of any length, so use padding for consistency
    # padding
    target = self.targets[index]
    return {"text": torch.tensor(input_ids, dtype=torch.long),
            # "attention_mask": torch.tensor(attention_mask, dtype=torch.float),
            "target": torch.tensor(target)}
    
# modify the code as per the requirements

### Dataset for simple image/vision problems

In [4]:
import torch 
import numpy as np 
import cv2

In [5]:
class CustomDataset:
  def __init__(self,image_paths, targets, augmentations):
    self.image_paths = image_paths
    self.targets = targets
    self.augmentations = augmentations
  def __len__(self):
    return len(self.image_paths) # TODO: what to do for multiclass problem, when data is in each class folder
    # return len(self.targets) # TODO: is it better?
    
  def __getitem__(self, index):
    if index < 0 or index >= len(self.targets):
      raise IndexError("Index out of range")
    target = self.targets[index]
    image = cv2.imread(self.image_paths[index]) # cv2 reads image in BGR format
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    # read mask image if you have it
    augmented = self.augmentations(image=image, mask = mask) # TODO: is it good practise to do augmentations in get item?
    image = augmented["image"]
    # mask = augmented["mask"]
    
    # pytorch expects the image to be in channel X height X width format
    # if image is grayscale (2d array), add a channel dimension: tensor.unsqueeze(0)
  
    image = np.transpose(image, (2, 0, 1)).astype(np.float32) # (index = 2 means channel, index = 0 means height, index = 1 means width)
    return {
      "image": torch.tensor(image),
      "target": torch.tensor(target)
    }

### Dataloader in pytorch

In [6]:
# dataloader: load the data in batches to pass that in nn model

In [7]:
import torch 
from sklearn.datasets import make_classification

In [8]:
class CustomDataset:
  def __init__(self, data, targets):
    self.data = data
    self.targets = targets
  def __len__(self):
    return self.data.shape[0] # assuming data is numpy array
  def __getitem__(self, index):
    if index < 0 or index >= self.data.shape[0]:
      raise IndexError("Index out of range")
    current_sample = self.data[index,:]
    current_target = self.targets[index]
    return {
      "x": torch.tensor(current_sample, dtype=torch.float),
      "y": torch.tensor(current_target, dtype=torch.long)
    }

In [9]:
data, targets = make_classification(n_samples=1000)
dataset = CustomDataset(data, targets)

In [10]:
len(dataset)

1000

In [11]:
?torch.utils.data.DataLoader

[0;31mInit signature:[0m
[0mtorch[0m[0;34m.[0m[0mutils[0m[0;34m.[0m[0mdata[0m[0;34m.[0m[0mDataLoader[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdataset[0m[0;34m:[0m [0mtorch[0m[0;34m.[0m[0mutils[0m[0;34m.[0m[0mdata[0m[0;34m.[0m[0mdataset[0m[0;34m.[0m[0mDataset[0m[0;34m[[0m[0;34m+[0m[0mT_co[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch_size[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mshuffle[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mbool[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msampler[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtorch[0m[0;34m.[0m[0mutils[0m[0;34m.[0m[0mdata[0m[0;34m.[0m[0msampler[0m[0;34m.[0m[0mSampler[0m[0;34m,[0m [0mIterable[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbatch

In [12]:
train_loader = torch.utils.data.DataLoader(dataset, batch_size = 4, num_workers=2) # num_workers = no. of cpus
# TODO: check on collate_fn, sampler

In [13]:
for data in train_loader:
  print(data["x"].shape)
  print(data["y"].shape)
  break

torch.Size([4, 20])
torch.Size([4])


In [14]:
for epoch in range(10):
  for data in train_loader:
    x = data["x"]
    y = data["y"]
    # outputs = model(x,y) 
    # loss = ....
    # loss.backward() 
    # ..

### Linear regression in pytorch

In [15]:
import torch 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [16]:
class CustomDataset:
  def __init__(self, data, targets):
    self.data = data
    self.targets = targets
  def __len__(self):
    return self.data.shape[0] # assuming data is numpy array
  def __getitem__(self, index):
    if index < 0 or index >= self.data.shape[0]:
      raise IndexError("Index out of range")
    current_sample = self.data[index,:]
    current_target = self.targets[index]
    return {
      "x": torch.tensor(current_sample, dtype=torch.float),
      "y": torch.tensor(current_target, dtype=torch.long)
    }

In [17]:
data, targets = make_classification(n_samples=1000)

In [18]:
train_data, test_data, train_targets, test_targets = train_test_split(data, targets, test_size=0.2, stratify=targets)
# stratify = targets balances the splitting of class data 

In [19]:
train_dataset = CustomDataset(train_data, train_targets)
test_dataset = CustomDataset(test_data, test_targets)

In [20]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 4, num_workers=2) 
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = 4, num_workers=2) 

In [21]:
train_data.shape

(800, 20)

In [44]:
# 20 features, 800 samples
W = torch.randn(20,1, requires_grad=True)
b = torch.randn(1, requires_grad=True)
model = lambda x, W, b: torch.matmul(x, W )+b 

In [45]:
# # for debugging purpose
# for data in train_loader:
#   ytrain = data["y"]
#   xtrain = data["x"]
#   print(ytrain.shape)
#   print(ytrain)
#   print("\n\n")
#   print(ytrain.view(-1))
#   print("\n\n")
#   output = model(xtrain, W, b)
#   print(output.shape)
#   print(output)
#   print("\n\n")
#   print(output.view(-1))
#   break

#### before training

In [46]:
outputs = []
labels = []
with torch.no_grad():
  for data in test_loader:
    xtest = data["x"]
    ytest = data["y"]
    output = model(xtest, W, b) # we already have updated W and b
    outputs.append(output)
    labels.append(ytest)

In [47]:
from sklearn.metrics import roc_auc_score
roc_auc_score(torch.cat(labels).view(-1),torch.cat(outputs).view(-1))

np.float64(0.6666)

#### after training

In [48]:
learning_rate = 1e-3
for epoch in range(10):
  epoch_loss = 0
  counter = 0
  for data in train_loader:
    xtrain = data["x"]
    ytrain = data["y"]
    if W.grad is not None:
      W.grad.zero_()
      b.grad.zero_()
    
    output = model(xtrain, W, b)
    loss = torch.mean((output.view(-1)-ytrain.view(-1))**2)
    epoch_loss += loss.item() 
    loss.backward()
    
    with torch.no_grad():
      W = W - learning_rate * W.grad
      b = b - learning_rate * b.grad
    
   
    W.requires_grad = True
    b.requires_grad = True
    counter += 1
  
  print(epoch, epoch_loss/counter)
    

0 6.2243585909157995
1 2.733622312322259
2 1.2703756904229522
3 0.6319175428152084
4 0.34590297657065094
5 0.2155058334954083
6 0.15531855349894613
7 0.1272759734466672
8 0.11410790178226307
9 0.1078808254795149


In [49]:
outputs = []
labels = []
with torch.no_grad():
  for data in test_loader:
    xtest = data["x"]
    ytest = data["y"]
    output = model(xtest, W, b) # we already have updated W and b
    outputs.append(output)
    labels.append(ytest)

In [50]:
outputs

[tensor([[0.5618],
         [0.5673],
         [0.2364],
         [0.5202]]),
 tensor([[0.6996],
         [0.2214],
         [0.1111],
         [0.0438]]),
 tensor([[ 0.5301],
         [ 0.9552],
         [-0.2258],
         [ 0.1616]]),
 tensor([[0.5515],
         [0.2680],
         [0.6203],
         [0.3364]]),
 tensor([[0.7252],
         [0.4925],
         [1.1024],
         [0.1418]]),
 tensor([[0.3567],
         [0.9198],
         [0.3579],
         [0.8766]]),
 tensor([[0.6352],
         [0.1582],
         [0.6269],
         [0.0196]]),
 tensor([[0.4243],
         [0.1705],
         [0.1947],
         [0.0018]]),
 tensor([[0.4520],
         [0.5331],
         [0.5158],
         [1.1775]]),
 tensor([[ 1.1416],
         [-0.1528],
         [ 0.0923],
         [ 0.0014]]),
 tensor([[0.6262],
         [0.2142],
         [0.1590],
         [0.1705]]),
 tensor([[0.1231],
         [0.8428],
         [0.5341],
         [0.6478]]),
 tensor([[0.0874],
         [0.4911],
         [0.5980],

In [51]:
from sklearn.metrics import roc_auc_score
roc_auc_score(torch.cat(labels).view(-1),torch.cat(outputs).view(-1))

np.float64(0.9535)

### Training and validation loops in pytorch

In [52]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [53]:

def train_one_step(model, data, optimizer):
  optimizer.zero_grad()
  for k, v in data.items():
    data[k] = v.to(device)
  loss = model(**data) 
  # loss = model(x=data["x"], y=data["y"])
  loss.backward()
  optimizer.step()
  return loss

In [54]:
def train_one_epoch(model, data_loader, optimizer, schedular):
  model.tran() # put model in training mode
  total_loss = 0
  for batch_idx, data in enumerate(data_loader):
    loss = train_one_step(model, data, optimizer)
    schedular.step() # step the learning rate scheduler
    total_loss += loss # TODO: if average loss is required
  return total_loss
  

In [55]:
def validate_one_step(model, data):
  for k, v in data.items():
    data[k] = v.to(device)
  loss = model(**data)
  return loss

In [56]:
def validate_one_epoch(model, data_loader):
  model.eval() # put model in evaluation mode
  total_loss = 0
  for batch_idx, data in enumerate(data_loader):
    with torch.no_grad():
      loss = validate_one_step(model, data)
    total_loss += loss
  return total_loss

### torch.nn 
docs: https://pytorch.org/docs/stable/nn.html

Containers:
* Module 
* Sequential (keep the layer in sequential order, automatically pass one by one)
* ModuleList (keep the layers in the list, you have to index them to use)
* ModuleDict (keep the layers in the dictionary)

Layers are:
* Conv1d
* Conv2d
* MaxPool1d
* MaxPool2d 
* AvgPool1d
* AvgPool2d
* BatchNorm1d
* BatchNorm2d
* LayerNorm
* LSTM
* GRU
* Linear 
* Dropout
* Embedding
* Transformer
* ReLU
* Sigmoid
* Flatten

Activations are: 
* ReLU
* LeakyReLU
* Sigmoid
* Tanh
* Softmax

Loss Functions: 
* MSELoss
* CrossEntropyLoss 
* NLLLoss (log likelihood)
* BCELoss 
* BCEWithLogitsLoss 



In [58]:
import torch 
import torch.nn as nn

In [62]:
class Model(nn.Module):
  def __init__(self):
    super().__init__() # super() is used to access the parent class
    self.layer1 = nn.Linear(128, 32) # 128 input features, 32 output features
    self.layer2 = nn.Linear(32, 16)
    self.layer3 = nn.Linear(16, 1)
  
  def forward(self, featurs):
    # (56, 128) # 56 is the batch size, 128 is the number of features
    x = self.layer1(featurs)
    # print(x.shape) # (56, 32) 
    x = self.layer2(x)
    # print(x.shape) # (56, 16)
    x = self.layer3(x)
    # print(x.shape) # (56, 1)
    return x
    

In [63]:
model = Model()
features = torch.rand((500, 128)) # 500 samples, 128 features
output = model(features)
print(output.shape) # (500, 1) -> for each sample one output as defined in the model

torch.Size([500, 1])


In [64]:
features.device

device(type='cpu')

In [67]:
features = features.to(device)
model = Model()
model.to(device=device)
model(features).shape

torch.Size([500, 1])

In [68]:
# Use of sequential 

class Model(nn.Module):
  def __init__(self):
    super().__init__() # super() is used to access the parent class
    self.base = nn.Sequential( # all layers will be applied sequentially
    nn.Linear(128, 32), # 128 input features, 32 output features
    nn.Linear(32, 16),
    nn.Linear(16, 1)
    )

  
  def forward(self, featurs):
    x = self.base(featurs)
    return x

In [69]:
features = features.to(device)
model = Model()
model.to(device=device)
model(features).shape

torch.Size([500, 1])

### general

In [None]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as opt 
import torch.autograd as grad
import torch.utils.data
import torchvision 
import torchvision.datasets
import torchvision.transforms 
import matplotlib.pyplot as plt 
import numpy as np 

from tqdm import tqdm 
from torch.utils.data import DataLoader, Dataset
 

random_seed = 64 

torch.manual_seed(random_seed) # seed for generating same random nos for the cpu

torch.cuda.manual_seed(random_seed) # seed for generating same random nos for the gpu

torch.backends.cudnn.deterministic = True  # forces to use deterministic algorithm in cuDNN by nvidia library, some operations in cuDNN can be non-deterministic which this setting prevents

torch.backends.cudnn.benchmark = False # disable cuDNN's automatic optimization for finding the best algorithm, use same algo for consistency