In [1]:
import numpy as np
import torch
import torch.nn.functional as F

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X, y = make_classification(n_samples=500, n_features=5, n_informative=5, n_redundant=0, n_classes=4)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, stratify=y)

X_train_t = torch.tensor(X_train, dtype=torch.float, device=device)
X_test_t = torch.tensor(X_train, dtype=torch.float, device=device)
y_train_t = torch.tensor(y_train, dtype=torch.long, device=device)
y_test_t = torch.tensor(y_train, dtype=torch.long, device=device)

train_ds = TensorDataset(X_train_t, y_train_t)
test_ds = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_ds, batch_size=16)
test_loader = DataLoader(test_ds, batch_size=64)


In [10]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(5, 32)
        nn.init.normal_(self.fc1.weight, mean=0.0, std=1.0)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 24)
        self.fc4 = nn.Linear(24, 4)
        self.drop = nn.Dropout(p=0.2)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.drop(x)
        x = F.relu(self.fc3(x))
        x = self.drop(x)
        x = self.fc4(x)
        return x


model = Classifier()
model.to(device)
print(model)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)


Classifier(
  (fc1): Linear(in_features=5, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=24, bias=True)
  (fc4): Linear(in_features=24, out_features=4, bias=True)
  (drop): Dropout(p=0.2, inplace=False)
)


In [8]:
for name, params in model.named_parameters():
    print(name)
    print(params)


fc1.weight
Parameter containing:
tensor([[ 0.4345,  0.2599, -0.4428,  0.5273, -0.4238],
        [-1.1420,  1.5063, -0.1439,  0.4210,  0.7373],
        [ 2.6301,  0.0297, -0.2762, -0.1317,  0.4022],
        [-0.6813, -0.4834, -0.1795,  0.0861, -0.3332],
        [-0.0086,  0.5530, -0.3460, -0.6616, -0.6208],
        [ 0.2777, -0.1962,  0.5358, -1.0017,  0.4319],
        [ 1.3984,  0.7106,  1.0105, -0.3274, -0.1993],
        [ 0.9489,  0.8337, -0.0905,  0.0811,  1.2139],
        [-1.4157, -2.0593,  0.5694,  0.3177,  1.5895],
        [ 2.3232, -0.4252, -0.6683,  0.7729, -2.5709],
        [-1.0085, -1.8043,  1.1724, -0.7644, -2.0162],
        [-1.4614, -0.5808,  2.4717,  0.6765, -0.7668],
        [-0.0468, -0.4617, -1.4929,  0.9394,  0.0715],
        [-0.6144, -0.0286, -1.7006,  0.1303, -0.9175],
        [-0.0709, -0.2456, -1.4111, -1.5462,  0.5141],
        [ 0.5092, -0.5950,  0.0190, -0.5872,  0.5012],
        [ 0.1355, -0.1903, -0.5052, -2.3160, -0.2656],
        [-0.9068, -1.2798,  0.19

In [11]:
epochs = 10

min_loss = np.Inf
eps = 1e-2

for epoch in range(epochs):
    model.train()
    training_loss = 0
    trainnig_accuracy = 0
    for samples, labels in train_loader:
        optimizer.zero_grad()
        output = model.forward(samples)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        training_loss += loss.item() * len(samples)
        # Calculate accuracy
        y_hat = torch.argmax(output, dim=1)
        trainnig_accuracy += torch.sum(y_hat == labels).item()
    else:
        training_loss = training_loss / len(train_ds)
        trainnig_accuracy = trainnig_accuracy / len(train_ds)
    
    model.eval()
    with torch.no_grad():
        validation_loss = 0
        validation_accuracy = 0
        for samples, labels in test_loader:
            output = model.forward(samples)
            loss = criterion(output, labels)
            validation_loss += loss.item() * len(samples)
            # Calculate accuracy
            y_hat = torch.argmax(output, dim=1)
            validation_accuracy += torch.sum(y_hat == labels).item()
        else:
            validation_loss = validation_loss / len(test_ds)
            validation_accuracy = validation_accuracy / len(test_ds)
    
    print(f"Epoch {epoch}, Training loss: {training_loss:.6f}, \
            Validation loss: {validation_loss:.6f}")
    print(f"Epoch {epoch}, Training accuracy: {trainnig_accuracy:.6f}, \
            Validation accuracy: {validation_accuracy:.6f}")
    
    if abs(validation_loss - min_loss) > eps:
        min_loss = validation_loss
        torch.save(model.state_dict(), 'model.pt')
        print("Save the model --> model.pt")    


Epoch 0, Training loss: 1.410114,             Validation loss: 1.275274
Epoch 0, Training accuracy: 0.257500,             Validation accuracy: 0.510000
Save the model --> model.pt
Epoch 1, Training loss: 1.218260,             Validation loss: 1.098020
Epoch 1, Training accuracy: 0.542500,             Validation accuracy: 0.607500
Save the model --> model.pt
Epoch 2, Training loss: 1.079079,             Validation loss: 0.953942
Epoch 2, Training accuracy: 0.595000,             Validation accuracy: 0.660000
Save the model --> model.pt
Epoch 3, Training loss: 0.960562,             Validation loss: 0.874992
Epoch 3, Training accuracy: 0.665000,             Validation accuracy: 0.682500
Save the model --> model.pt
Epoch 4, Training loss: 0.933285,             Validation loss: 0.827884
Epoch 4, Training accuracy: 0.625000,             Validation accuracy: 0.705000
Save the model --> model.pt
Epoch 5, Training loss: 0.880729,             Validation loss: 0.794364
Epoch 5, Training accuracy: 

In [64]:
# Upload model
model.load_state_dict(torch.load('model.pt'))
model

Classifier(
  (fc1): Linear(in_features=5, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=24, bias=True)
  (fc4): Linear(in_features=24, out_features=4, bias=True)
  (drop): Dropout(p=0.2, inplace=False)
)

In [50]:
model.state_dict().keys()

odict_keys(['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias', 'fc4.weight', 'fc4.bias'])

In [83]:
np.random.seed(1234)
torch.manual_seed(1234)

w = torch.randn(3, 4, requires_grad=True)
print(w)

x = w * torch.tensor([1, 1, 1, 1]) + 1
x.retain_grad()
print(x)

z = torch.mean(x)
print(z)

z.backward()

tensor([[ 0.0461,  0.4024, -1.0115,  0.2167],
        [-0.6123,  0.5036,  0.2310,  0.6931],
        [-0.2669,  2.1785,  0.1021, -0.2590]], requires_grad=True)
tensor([[ 1.0461,  1.4024, -0.0115,  1.2167],
        [ 0.3877,  1.5036,  1.2310,  1.6931],
        [ 0.7331,  3.1785,  1.1021,  0.7410]], grad_fn=<AddBackward0>)
tensor(1.1853, grad_fn=<MeanBackward0>)


In [84]:
x.grad

tensor([[0.0833, 0.0833, 0.0833, 0.0833],
        [0.0833, 0.0833, 0.0833, 0.0833],
        [0.0833, 0.0833, 0.0833, 0.0833]])

In [85]:
w.grad

tensor([[0.0833, 0.0833, 0.0833, 0.0833],
        [0.0833, 0.0833, 0.0833, 0.0833],
        [0.0833, 0.0833, 0.0833, 0.0833]])

In [86]:
z.grad

In [103]:
a = torch.zeros(5)
a.requires_grad = True

b = 2 * a

b.retain_grad()   # Since b is non-leaf and it's grad will be destroyed otherwise.

c = b.mean()

c.backward()

print(a.grad)

tensor([0.4000, 0.4000, 0.4000, 0.4000, 0.4000])


In [92]:
a

tensor([0., 0., 0., 0., 0.], requires_grad=True)

In [49]:
import torch

dtype = torch.float
device = torch.device("cpu")
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 2, 5, 2, 7

# Create random Tensors to hold input and outputs.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Tensors during the backward pass.
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

# Create random Tensors for weights.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2):
    # Forward pass: compute predicted y using operations on Tensors; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call w1.grad and w2.grad will be Tensors holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    # An alternative way is to operate on weight.data and weight.grad.data.
    # Recall that tensor.data gives a tensor that shares the storage with
    # tensor, but doesn't track history.
    # You can also use torch.optim.SGD to achieve this.
    with torch.no_grad():
        print(w1.grad)
        w1 -= learning_rate * w1.grad
        print(w2.grad)
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()

tensor([[ 80.6603,  -0.3962],
        [223.6336,   2.8058],
        [134.6960,  -0.4273],
        [158.6102,   2.9853],
        [106.7771,  -5.0731]])
tensor([[ 26.1258, -42.2493, -99.2625,  -8.8227, 160.3694, -28.2195,  48.8896],
        [  0.6235,   1.8275,   2.0791,   1.4693,   4.4247,   1.7089,  -1.6074]])
tensor([[ 80.6382,  -0.3962],
        [223.5714,   2.8058],
        [134.6590,  -0.4273],
        [158.5659,   2.9853],
        [106.7489,  -5.0732]])
tensor([[ 26.1188, -42.2330, -99.2254,  -8.8183, 160.3078, -28.2089,  48.8725],
        [  0.6235,   1.8277,   2.0794,   1.4693,   4.4241,   1.7090,  -1.6075]])


In [52]:
class MyLayer(nn.Module):
  def __init__(self, param):
    super().__init__()
    self.param = param 
  
  def forward(self, x):
    return x * self.param
  
myLayerObject = MyLayer(5)
output = myLayerObject(torch.Tensor([5, 4, 3]) )    #calling forward inexplicitly 
print(output)

tensor([25., 20., 15.])


In [53]:
class net2(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv = nn.Linear(10,5) 
    self.tens = nn.Parameter(torch.ones(3,4))                       # This will show up in a parameter list 
    
  def forward(self, x):
    return self.linear(x)

myNet = net2()
print(list(myNet.parameters()))

[Parameter containing:
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], requires_grad=True), Parameter containing:
tensor([[-1.8922e-01,  3.8575e-02, -5.8615e-02,  6.8145e-02, -7.5199e-02,
         -2.0701e-01,  4.1986e-02,  2.1418e-01, -1.8987e-01,  2.7720e-01],
        [ 7.7828e-02,  3.1181e-01, -2.8985e-01, -3.0270e-01,  1.0728e-01,
         -2.9501e-01, -2.5915e-01,  1.6080e-01,  1.5351e-01, -2.8815e-01],
        [ 2.2755e-01, -6.1489e-02,  2.3477e-01, -1.8558e-01, -3.0069e-01,
         -2.0963e-01, -4.9024e-02, -3.1582e-01, -2.1546e-02, -1.5782e-01],
        [-7.3603e-02, -4.4972e-05,  1.7748e-02, -3.0933e-01,  1.6266e-01,
         -1.9382e-01,  1.9272e-01,  2.8977e-01, -1.0546e-01,  3.5491e-02],
        [ 1.7436e-01, -2.5054e-02, -2.6460e-01,  1.4205e-01,  1.5044e-01,
         -1.7325e-01, -2.1292e-01, -5.5187e-02,  6.2001e-02, -1.5992e-01]],
       requires_grad=True), Parameter containing:
tensor([-0.0076, -0.1367, -0.1232, -0.0433, -0.1205], requi

In [56]:
layer_list = [nn.Conv2d(5,5,3), nn.BatchNorm2d(5), nn.Linear(5,2)]

class myNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.layers = nn.ModuleList(layer_list)
  
  def forward(x):
    for layer in self.layers:
      x = layer(x)

net = myNet()

print(list(net.parameters()))


[Parameter containing:
tensor([[[[ 0.0867, -0.1081,  0.0585],
          [ 0.0793,  0.0411, -0.0754],
          [ 0.1451, -0.1044, -0.1212]],

         [[ 0.0483,  0.0630,  0.0113],
          [ 0.0056,  0.0625,  0.1055],
          [-0.0721,  0.0468, -0.0549]],

         [[-0.0771, -0.0774, -0.1109],
          [ 0.1258, -0.0505, -0.0419],
          [-0.0036, -0.0971,  0.0966]],

         [[-0.0464,  0.0792,  0.0433],
          [-0.1480, -0.1371, -0.1453],
          [ 0.1338,  0.1360,  0.0510]],

         [[-0.0620,  0.0490, -0.1126],
          [ 0.0242, -0.0204,  0.1021],
          [ 0.0094, -0.0007, -0.0132]]],


        [[[ 0.1431, -0.1043,  0.0676],
          [-0.1341,  0.0274, -0.1249],
          [ 0.0724,  0.1334, -0.0715]],

         [[ 0.1337, -0.1335,  0.0711],
          [ 0.0254, -0.0341, -0.0206],
          [ 0.0596, -0.0272, -0.1442]],

         [[-0.0538,  0.1265, -0.1284],
          [-0.0503,  0.0615,  0.1090],
          [-0.1121, -0.1032,  0.0899]],

         [[ 0.1014,  0.

In [122]:
x = torch.ones(1, 3)
print(x)

y = torch.ones(2, 3, 3)
print(y)

z = x + y
print(z)


tensor([[1., 1., 1.]])
tensor([[[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]],

        [[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]])
tensor([[[2., 2., 2.],
         [2., 2., 2.],
         [2., 2., 2.]],

        [[2., 2., 2.],
         [2., 2., 2.],
         [2., 2., 2.]]])


In [125]:
z.shape

torch.Size([2, 3, 3])

In [None]:
optimiser = torch.optim.SGD([{"params": Net.fc1.parameters(), 'lr' : 0.001, "momentum" : 0.99},
                             {"params": Net.fc2.parameters()}], lr = 0.01, momentum = 0.9)

In [11]:
help(torch.randn)

Help on built-in function randn:

randn(...)
    randn(*size, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor
    
    Returns a tensor filled with random numbers from a normal distribution
    with mean `0` and variance `1` (also called the standard normal
    distribution).
    
    .. math::
        \text{out}_{i} \sim \mathcal{N}(0, 1)
    
    The shape of the tensor is defined by the variable argument :attr:`size`.
    
    Args:
        size (int...): a sequence of integers defining the shape of the output tensor.
            Can be a variable number of arguments or a collection like a list or tuple.
        out (Tensor, optional): the output tensor.
        dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_tensor_type`).
        layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.
            De