## Multilayer Perceptron (Non linear problems)

* Every module in PyTorch subclasses the nn.Module.

In [128]:
import sys
!{sys.executable} -m pip install -q tensorflow-datasets


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
# Import the necessary library
import numpy as np
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn # torch.nn namespace provides all the building blocks you need to build your own neural network.

from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [22]:
input_image = torch.rand(3,28,28)
print(input_image.size())
print(input_image)

torch.Size([3, 28, 28])
tensor([[[0.0504, 0.8947, 0.2288,  ..., 0.8567, 0.3168, 0.8990],
         [0.8838, 0.8528, 0.9799,  ..., 0.1934, 0.5887, 0.1273],
         [0.5529, 0.7707, 0.2857,  ..., 0.5629, 0.0726, 0.4640],
         ...,
         [0.5918, 0.5450, 0.4096,  ..., 0.6315, 0.7036, 0.3030],
         [0.7919, 0.1907, 0.5372,  ..., 0.2951, 0.9086, 0.3339],
         [0.8247, 0.3221, 0.6539,  ..., 0.6823, 0.2273, 0.1844]],

        [[0.8727, 0.3278, 0.0930,  ..., 0.0768, 0.9419, 0.7781],
         [0.2341, 0.2435, 0.1499,  ..., 0.7503, 0.6852, 0.4001],
         [0.4934, 0.4903, 0.0756,  ..., 0.0104, 0.3208, 0.1865],
         ...,
         [0.6090, 0.5347, 0.0583,  ..., 0.4486, 0.1968, 0.6290],
         [0.5957, 0.8413, 0.8027,  ..., 0.9878, 0.1033, 0.7163],
         [0.2771, 0.7055, 0.2958,  ..., 0.5621, 0.3019, 0.5191]],

        [[0.6832, 0.3939, 0.3172,  ..., 0.3044, 0.2071, 0.1793],
         [0.8646, 0.4800, 0.9653,  ..., 0.6829, 0.2499, 0.2173],
         [0.1331, 0.0341, 0.5350, 

In [23]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())
print(flat_image)

torch.Size([3, 784])
tensor([[0.0504, 0.8947, 0.2288,  ..., 0.6823, 0.2273, 0.1844],
        [0.8727, 0.3278, 0.0930,  ..., 0.5621, 0.3019, 0.5191],
        [0.6832, 0.3939, 0.3172,  ..., 0.9514, 0.7134, 0.9562]])


In [25]:
## Took all input 28*28 and multiplied with weights, then applied summation, linear transformation, output [-1, 1]

layer1 = nn.Linear(in_features=28*28, out_features=20) # Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
hidden1 = layer1(flat_image)
print(hidden1.size())
print(hidden1)

torch.Size([3, 20])
tensor([[ 0.2861, -0.3594,  0.5275, -0.4018, -0.1297,  0.1118, -0.0690, -0.2954,
          0.5146, -0.2720,  0.3401, -0.4383, -0.1011,  0.3031,  0.2028,  0.2604,
          0.1059, -0.4668,  0.5694, -0.4137],
        [ 0.1466,  0.2805,  0.7327, -0.2291,  0.1350,  0.3265,  0.1327, -0.6191,
          0.3225, -0.1646,  0.3650, -0.2924, -0.2761,  0.2765,  0.0956,  0.3204,
          0.0273,  0.0031,  0.2421, -0.2222],
        [ 0.1227,  0.1003,  0.3599, -0.5632,  0.0815,  0.1213, -0.3312, -0.4519,
          0.7450,  0.3061,  0.4536, -0.4046, -0.1204,  0.1463,  0.4700, -0.0416,
         -0.0021, -0.3618,  0.5015, -0.3423]], grad_fn=<AddmmBackward0>)


In [27]:
## They are applied after linear transformations to introduce nonlinearity

hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

After ReLU: tensor([[0.2861, 0.0000, 0.5275, 0.0000, 0.0000, 0.1118, 0.0000, 0.0000, 0.5146,
         0.0000, 0.3401, 0.0000, 0.0000, 0.3031, 0.2028, 0.2604, 0.1059, 0.0000,
         0.5694, 0.0000],
        [0.1466, 0.2805, 0.7327, 0.0000, 0.1350, 0.3265, 0.1327, 0.0000, 0.3225,
         0.0000, 0.3650, 0.0000, 0.0000, 0.2765, 0.0956, 0.3204, 0.0273, 0.0031,
         0.2421, 0.0000],
        [0.1227, 0.1003, 0.3599, 0.0000, 0.0815, 0.1213, 0.0000, 0.0000, 0.7450,
         0.3061, 0.4536, 0.0000, 0.0000, 0.1463, 0.4700, 0.0000, 0.0000, 0.0000,
         0.5015, 0.0000]], grad_fn=<ReluBackward0>)


In [36]:
softmax = nn.Softmax(dim = 1) # gets the output probability
pred_probab = softmax(hidden1)
output = pred_probab.argmax(1)
print(pred_probab.size())
print(pred_probab)
print(output)

torch.Size([3, 20])
tensor([[0.0555, 0.0417, 0.0707, 0.0417, 0.0417, 0.0466, 0.0417, 0.0417, 0.0698,
         0.0417, 0.0586, 0.0417, 0.0417, 0.0565, 0.0511, 0.0541, 0.0464, 0.0417,
         0.0737, 0.0417],
        [0.0479, 0.0548, 0.0861, 0.0414, 0.0474, 0.0574, 0.0473, 0.0414, 0.0572,
         0.0414, 0.0596, 0.0414, 0.0414, 0.0546, 0.0455, 0.0570, 0.0425, 0.0415,
         0.0527, 0.0414],
        [0.0465, 0.0454, 0.0589, 0.0411, 0.0446, 0.0464, 0.0411, 0.0411, 0.0866,
         0.0558, 0.0647, 0.0411, 0.0411, 0.0476, 0.0658, 0.0411, 0.0411, 0.0411,
         0.0679, 0.0411]], grad_fn=<SoftmaxBackward0>)
tensor([18,  2,  8])


### Other available layers

In [None]:
hidden2 = nn.Conv2d(in_channels=16, # Number of channels in the input image
                    out_channels=33, 
                    kernel_size=3, # Size of the convolving kernel
                    stride=(2,1),
                    padding=(4,2),
                    dilation=(3,1), #Spacing between kernel elements.
                    groups=None,
                    bias=True)

In [58]:
# (H X W X C X B)

# With square kernels and equal stride
m = nn.Conv2d(16, 33, 3, stride=2) # o/p = (50-3)/2 +1 and (100-3)/2 +1

# # non-square kernels and unequal stride and with padding
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) # o/p = (50+4+4-3)/2 +1 and (100+2+2-5)/1 +1
# # non-square kernels and unequal stride and with padding and dilation
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
input = torch.randn(20, 16, 50, 100)
output = m(input)
print(output.size())
m

torch.Size([20, 33, 26, 100])


Conv2d(16, 33, kernel_size=(3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))

In [69]:
##Randomly zero out entire channels.

m = nn.Dropout2d(p=0.5)
input = torch.randn(2, 3, 4, 4)
output = m(input)
print(output.size())
output

torch.Size([2, 3, 4, 4])


tensor([[[[ 0.6838, -1.9966,  0.1360, -1.0349],
          [ 0.8146, -2.3455,  0.8414, -1.6261],
          [ 1.9926, -1.0358,  0.5337,  2.6652],
          [-0.3238, -1.5145, -1.8557,  2.0575]],

         [[ 0.0000, -0.0000, -0.0000, -0.0000],
          [-0.0000, -0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000, -0.0000],
          [ 0.0000, -0.0000,  0.0000,  0.0000]],

         [[-0.0000,  0.0000,  0.0000, -0.0000],
          [-0.0000,  0.0000, -0.0000,  0.0000],
          [-0.0000,  0.0000, -0.0000, -0.0000],
          [ 0.0000, -0.0000, -0.0000,  0.0000]]],


        [[[ 0.0000, -0.0000,  0.0000, -0.0000],
          [ 0.0000,  0.0000,  0.0000, -0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000, -0.0000, -0.0000,  0.0000]],

         [[-0.0000,  0.0000, -0.0000,  0.0000],
          [-0.0000, -0.0000, -0.0000, -0.0000],
          [ 0.0000, -0.0000, -0.0000, -0.0000],
          [-0.0000, -0.0000,  0.0000,  0.0000]],

         [[-0.0000,  0.0000,

In [83]:
m = nn.AvgPool2d(kernel_size=(2,2), stride=1, padding=0) # default stride = kernel
input = torch.randn(2, 3, 4, 4)
output = m(input)
print(output.size())
print(input)
output

torch.Size([2, 3, 3, 3])
tensor([[[[-2.1127, -0.0117, -0.9995, -0.7540],
          [-1.6996,  0.1008, -1.8554,  0.4864],
          [-1.2076,  0.5735, -1.3745,  1.0918],
          [-1.3275, -2.2212,  0.0363,  0.3398]],

         [[-1.1324, -1.0173,  0.7720, -1.0675],
          [-0.4832,  1.3978,  0.4220, -0.9486],
          [ 1.6168, -0.4436,  2.4575, -0.5579],
          [-1.7259, -0.5059,  0.7828, -1.2560]],

         [[ 1.2353, -1.3704, -0.8890,  0.1896],
          [-0.8747, -0.6719, -0.5197,  0.5169],
          [ 0.3571,  1.5120, -0.3358, -0.1510],
          [-0.7454,  0.5255, -0.0294,  0.2510]]],


        [[[ 0.6875,  0.9067,  0.9233,  0.0235],
          [ 0.2635,  0.8541,  1.2604, -0.6985],
          [ 0.2171,  0.8380, -0.0724, -1.6606],
          [ 2.0614,  0.1099, -0.2883, -0.5975]],

         [[ 0.1161,  0.8428,  0.6648, -0.0188],
          [-0.6637,  0.5589,  0.0941, -0.3291],
          [ 0.4265,  0.4298, -1.4245, -0.1953],
          [ 0.1856,  1.1143, -0.7100, -0.9153]],

   

tensor([[[[-0.9308, -0.6915, -0.7806],
          [-0.5582, -0.6389, -0.4129],
          [-1.0457, -0.7465,  0.0234]],

         [[-0.3088,  0.3936, -0.2055],
          [ 0.5219,  0.9584,  0.3432],
          [-0.2646,  0.5727,  0.3566]],

         [[-0.4204, -0.8627, -0.1755],
          [ 0.0806, -0.0038, -0.1224],
          [ 0.4123,  0.4181, -0.0663]]],


        [[[ 0.6779,  0.9861,  0.3772],
          [ 0.5432,  0.7200, -0.2928],
          [ 0.8066,  0.1468, -0.6547]],

         [[ 0.2135,  0.5401,  0.1028],
          [ 0.1879, -0.0855, -0.4637],
          [ 0.5390, -0.1476, -0.8113]],

         [[ 0.0938,  0.5958,  0.0714],
          [-0.1521, -0.1014, -0.4825],
          [ 0.1424, -0.6678, -0.8092]]]])

In [114]:
# With Learnable Parameters
m = nn.BatchNorm2d(num_features=3)
# Without Learnable Parameters
# m = nn.BatchNorm2d(100, affine=False)
input = torch.randn(2, 3, 64, 64)
output = m(input)
print(input)
print(output.size())
output


tensor([[[[ 5.4101e-01, -3.7756e-01, -3.0646e-01,  ...,  7.2783e-02,
           -1.2965e+00, -1.8411e-01],
          [ 9.4833e-01,  5.9965e-01,  5.7454e-01,  ..., -9.9109e-01,
            4.3715e-01,  5.1081e-01],
          [-5.0062e-01, -7.2004e-01, -4.2925e-01,  ...,  2.1094e-01,
            4.9922e-01,  3.5702e-01],
          ...,
          [-1.1852e+00,  7.2664e-01, -5.2805e-01,  ...,  7.4223e-03,
            6.9306e-01, -1.2317e+00],
          [-1.0776e+00, -6.3344e-01, -5.4436e-01,  ...,  1.5077e-01,
            2.4860e+00, -3.9882e-01],
          [ 9.8535e-01,  6.1906e-01,  1.2447e+00,  ...,  1.7745e+00,
           -1.2297e+00, -9.5257e-01]],

         [[-1.8146e+00, -2.9511e-01,  1.2121e+00,  ...,  9.0286e-01,
            7.9889e-01, -3.1636e-01],
          [ 1.3659e+00, -1.2297e+00, -7.8911e-01,  ...,  9.3384e-02,
           -4.1785e-01, -7.9154e-02],
          [ 1.9059e+00, -5.1964e-01,  3.2222e-01,  ..., -5.7145e-01,
            1.1025e+00,  1.4887e+00],
          ...,
     

tensor([[[[ 0.5516, -0.3642, -0.2933,  ...,  0.0848, -1.2803, -0.1713],
          [ 0.9577,  0.6101,  0.5850,  ..., -0.9758,  0.4481,  0.5215],
          [-0.4869, -0.7056, -0.4157,  ...,  0.2225,  0.5099,  0.3682],
          ...,
          [-1.1694,  0.7367, -0.5142,  ...,  0.0196,  0.7032, -1.2157],
          [-1.0621, -0.6193, -0.5305,  ...,  0.1625,  2.4907, -0.3854],
          [ 0.9946,  0.6294,  1.2532,  ...,  1.7814, -1.2138, -0.9374]],

         [[-1.8309, -0.3052,  1.2083,  ...,  0.8977,  0.7934, -0.3265],
          [ 1.3627, -1.2436, -0.8012,  ...,  0.0849, -0.4284, -0.0883],
          [ 1.9049, -0.5306,  0.3147,  ..., -0.5826,  1.0983,  1.4860],
          ...,
          [-1.5659,  2.0686, -0.0534,  ...,  0.0160, -0.7573, -0.8640],
          [-0.1328,  0.2140, -1.2772,  ..., -0.3652,  0.2617,  1.7573],
          [-0.5593,  0.6810, -0.3828,  ..., -0.0970, -1.1859, -0.2652]],

         [[ 0.6048,  1.4737, -1.7452,  ..., -1.0891, -0.0374,  0.7396],
          [ 1.5391,  0.2028,  

In [119]:
input[1].detach().std()

tensor(0.9960)

In [118]:
input[1].detach().std()

tensor(0.9960)

In [121]:
m = nn.LayerNorm(normalized_shape=(64,64))
# Without Learnable Parameters
m = nn.LayerNorm([3, 64, 64])
input = torch.randn(2, 3, 64, 64)
output = m(input)
print(input)
print(output.size())
output

tensor([[[[-0.1565,  0.7500, -0.3719,  ..., -0.9616,  0.4348,  0.4259],
          [-0.6004, -0.8510, -0.6980,  ...,  0.2051,  1.4555,  1.2885],
          [ 1.4530,  1.5289,  1.0650,  ...,  0.5348,  2.6172, -0.8863],
          ...,
          [-0.0803, -0.8151,  1.7993,  ...,  0.1407,  1.3291, -0.1148],
          [-1.2900,  0.5844,  0.1830,  ...,  0.3862, -0.3855, -0.3404],
          [-0.8001, -0.9073,  0.3587,  ...,  0.8436, -0.5880,  0.1786]],

         [[-0.7840,  0.8379,  0.7247,  ...,  0.3759,  0.5021,  0.0217],
          [ 0.6922, -0.9915,  1.3850,  ...,  0.5162, -0.6761, -0.9231],
          [ 1.3833,  0.2084,  2.2427,  ..., -0.7024,  0.5072, -0.1283],
          ...,
          [-0.0527, -1.6055,  0.2229,  ..., -0.4965,  0.0445, -0.1832],
          [-0.2423,  0.7202, -0.8157,  ...,  0.5042,  0.8021, -0.4585],
          [ 2.3486, -1.0346, -0.3242,  ...,  1.0822,  1.1736,  0.0142]],

         [[-1.4340,  0.3589, -0.4900,  ..., -0.5965,  1.9081,  2.0831],
          [ 0.1741,  0.5044,  

tensor([[[[-1.6897e-01,  7.4678e-01, -3.8666e-01,  ..., -9.8237e-01,
            4.2833e-01,  4.1941e-01],
          [-6.1751e-01, -8.7066e-01, -7.1608e-01,  ...,  1.9631e-01,
            1.4595e+00,  1.2908e+00],
          [ 1.4570e+00,  1.5337e+00,  1.0650e+00,  ...,  5.2935e-01,
            2.6331e+00, -9.0633e-01],
          ...,
          [-9.2037e-02, -8.3439e-01,  1.8068e+00,  ...,  1.3125e-01,
            1.3318e+00, -1.2684e-01],
          [-1.3141e+00,  5.7944e-01,  1.7394e-01,  ...,  3.7926e-01,
           -4.0040e-01, -3.5475e-01],
          [-8.1921e-01, -9.2752e-01,  3.5146e-01,  ...,  8.4137e-01,
           -6.0494e-01,  1.6954e-01]],

         [[-8.0300e-01,  8.3563e-01,  7.2127e-01,  ...,  3.6889e-01,
            4.9637e-01,  1.0979e-02],
          [ 6.8844e-01, -1.0126e+00,  1.3882e+00,  ...,  5.1063e-01,
           -6.9390e-01, -9.4351e-01],
          [ 1.3866e+00,  1.9968e-01,  2.2548e+00,  ..., -7.2052e-01,
            5.0145e-01, -1.4049e-01],
          ...,
     

In [125]:
# an Embedding module containing 10 tensors of size 3
embedding = nn.Embedding(num_embeddings = 10, ## size of the dictionary of embeddings
                         embedding_dim=3) ## the size of each embedding vector
# a batch of 2 samples of 4 indices each
input = torch.LongTensor([[1, 2, 4, 9], [4, 3, 2, 9]])

output = embedding(input)
print(input)
print(output.size())
output

tensor([[1, 2, 4, 9],
        [4, 3, 2, 9]])
torch.Size([2, 4, 3])


tensor([[[ 0.9467,  0.0607,  1.4769],
         [-0.1438,  0.0460, -1.1133],
         [ 0.0701,  1.3681,  0.7398],
         [-2.1502,  0.6903, -0.1385]],

        [[ 0.0701,  1.3681,  0.7398],
         [ 0.0580,  0.6396, -1.2366],
         [-0.1438,  0.0460, -1.1133],
         [-2.1502,  0.6903, -0.1385]]], grad_fn=<EmbeddingBackward0>)

In [87]:
m = nn.MaxPool2d(kernel_size=(2,2), stride=2, padding=0, dilation=1) # default stride = kernel
input = torch.randn(2, 3, 4, 4)
output = m(input)
print(output.size())
output

torch.Size([2, 3, 2, 2])


tensor([[[[ 1.7932,  0.6299],
          [ 0.8435,  0.8923]],

         [[ 1.2835,  0.6578],
          [ 1.8061,  2.2923]],

         [[ 0.5800,  1.9448],
          [ 0.3711,  0.3666]]],


        [[[-0.3181,  0.8630],
          [ 1.5073,  0.5394]],

         [[ 0.4939,  1.0390],
          [ 0.5135,  1.3587]],

         [[ 0.5705,  2.5824],
          [ 1.3930,  0.9852]]]])

In [97]:
m = nn.Unflatten(1, (2, 5, 5)) 
input = torch.rand(5, 50)
output = m(input)
print(output.size())
output

torch.Size([5, 2, 5, 5])


tensor([[[[0.4137, 0.4168, 0.6267, 0.5545, 0.1852],
          [0.4755, 0.8939, 0.7794, 0.4487, 0.4183],
          [0.4353, 0.1060, 0.7417, 0.5332, 0.9933],
          [0.5534, 0.8294, 0.9621, 0.3030, 0.6233],
          [0.7462, 0.3160, 0.1598, 0.2552, 0.4829]],

         [[0.6314, 0.1528, 0.6282, 0.6284, 0.9382],
          [0.3003, 0.6618, 0.0927, 0.5614, 0.6508],
          [0.9528, 0.7099, 0.8240, 0.6259, 0.2566],
          [0.5557, 0.3863, 0.2540, 0.2950, 0.3233],
          [0.9933, 0.7359, 0.6636, 0.0043, 0.4087]]],


        [[[0.4160, 0.0852, 0.8259, 0.1677, 0.5866],
          [0.4327, 0.5851, 0.9825, 0.6645, 0.1688],
          [0.5623, 0.6275, 0.6107, 0.7818, 0.8212],
          [0.0745, 0.9408, 0.1810, 0.6503, 0.4775],
          [0.9192, 0.9951, 0.2679, 0.0293, 0.7661]],

         [[0.9098, 0.9990, 0.3957, 0.6331, 0.3246],
          [0.2829, 0.2258, 0.0270, 0.9127, 0.0318],
          [0.5094, 0.8721, 0.1583, 0.9802, 0.6206],
          [0.2394, 0.5353, 0.2095, 0.5855, 0.4009],
    

In [88]:
m = nn.Sigmoid() # 1/(1+ e**(-x))
input = torch.randn(2)
output = m(input)
print(input)
print(output)

tensor([-1.1958,  1.0164])
tensor([0.2322, 0.7343])


In [37]:
seq_model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(in_features=28*28, out_features=20),
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_model(input_image) # logits - raw values in [-infty, infty] 
logits

tensor([[-0.0791,  0.0007, -0.3884, -0.0749,  0.3717,  0.2087, -0.0190,  0.0519,
         -0.0928, -0.2545],
        [-0.1441,  0.0767, -0.2908, -0.0611,  0.3775,  0.1745,  0.1058,  0.0007,
         -0.2172, -0.2198],
        [-0.2000,  0.0842, -0.2380, -0.1137,  0.3960,  0.1900,  0.0168, -0.0478,
         -0.2367, -0.2394]], grad_fn=<AddmmBackward0>)

In [38]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
pred_probab

tensor([[0.0930, 0.1007, 0.0683, 0.0934, 0.1460, 0.1240, 0.0988, 0.1060, 0.0917,
         0.0780],
        [0.0866, 0.1079, 0.0747, 0.0940, 0.1458, 0.1190, 0.1111, 0.1000, 0.0805,
         0.0802],
        [0.0833, 0.1107, 0.0802, 0.0908, 0.1512, 0.1230, 0.1035, 0.0970, 0.0803,
         0.0801]], grad_fn=<SoftmaxBackward0>)

In [21]:
print(f"Model structure: {seq_model}\n\n")

for name, param in seq_model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0246,  0.0040, -0.0346,  ...,  0.0210,  0.0203, -0.0181],
        [-0.0066,  0.0261, -0.0342,  ..., -0.0343, -0.0258,  0.0323]],
       device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0339,  0.0023], device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([10, 512]) | Values : tensor([[-0.0208, -0.0348, -0.0147,  ..., -0.0129,  0.0115, -0.0020],
        [ 0.0379,  0.0059,  0.0047,  ..., -0.0382,  0.0369, -0.0121]],
       device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Size: torch.Size([10]) | Values

### Define MLP

In [39]:
# Generate a linearly separable dataset with two classes
X, y = make_blobs(n_samples=1000,
                  n_features=2, 
                  centers=2, 
                  cluster_std=3,
                  random_state=23)

In [40]:
 
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=23,
                                                    shuffle=True
                                                   )
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((800, 2), (200, 2), (800,), (200,))

In [41]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" 
    if torch.backends.mps.is_available() 
    else "cpu"
)
print(f"Using {device} device") # MPS is metal performance Shaders (metal GPUs)

Using mps device


In [42]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten() # flatten it from any dimension to 1
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            # nn.Linear(512, 512),
            # nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [43]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [45]:
X = torch.rand(1, 28, 28, device=device) # batch of 1

logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
pred_probab

tensor([[0.0993, 0.1093, 0.0907, 0.1076, 0.0965, 0.0882, 0.0996, 0.1163, 0.1002,
         0.0924]], device='mps:0', grad_fn=<SoftmaxBackward0>)

In [46]:
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([7], device='mps:0')


In [47]:
## Save and load model using pickle
torch.save(model, "my_model.pickle")
model = torch.load("my_model.pickle")

In [51]:
## recommended way of saving a model is to leave the model design in code and keep only the weights.

torch.save(model.state_dict(), "my_model_weights.pickle")
model = nn.Sequential(nn.Linear(28*28, 512),
            nn.ReLU(),
            # nn.Linear(512, 512),
            # nn.ReLU(),
            nn.Linear(512, 10))
model.load_state_dict(torch.load("my_model_weights.pickle"))

<All keys matched successfully>