In [1]:
import os 
import torch  
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms


In [6]:
#Get Device for training
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else  "cpu"
print(f"Using {device} device")
print(torch.__version__)

Using cpu device
2.6.0+cpu


In [7]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [8]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [14]:
X = torch.rand(1, 28, 28, device=device) # 1 image with 28x28 pixels
logits = model(X) # Pass the image to the model
print(logits)
pred_probab = nn.Softmax(dim=1)(logits) # nn.Softmax(dim=1) is a function that converts logits to probabilities
y_pred = pred_probab.argmax(1) # Get the predicted class of the image 
print(f"Predicted class: {y_pred}")

tensor([[0.0047, 0.0064, 0.0000, 0.0000, 0.0403, 0.0129, 0.0113, 0.0712, 0.0377,
         0.0000]], grad_fn=<ReluBackward0>)
Predicted class: tensor([7])


In [11]:
help('torch.rand')

Help on built-in function rand in torch:

torch.rand = rand(...)
    rand(*size, *, generator=None, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False, pin_memory=False) -> Tensor
    
    Returns a tensor filled with random numbers from a uniform distribution
    on the interval :math:`[0, 1)`
    
    The shape of the tensor is defined by the variable argument :attr:`size`.
    
    Args:
        size (int...): a sequence of integers defining the shape of the output tensor.
            Can be a variable number of arguments or a collection like a list or tuple.
    
    Keyword args:
        generator (:class:`torch.Generator`, optional): a pseudorandom number generator for sampling
        out (Tensor, optional): the output tensor.
        dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.
            Default: if ``None``, uses a global default (see :func:`torch.set_default_dtype`).
        layout (:class:`torch.layout`, option

In [13]:
print(torch.rand(1,2,2))

tensor([[[0.3498, 0.7708],
         [0.9219, 0.1663]]])


In [28]:
image_input = torch.rand(3, 28, 28)
print(image_input.shape)

torch.Size([3, 28, 28])


In [30]:
flatten = nn.Flatten()
flat_image = flatten(image_input)
print(flat_image.size())

torch.Size([3, 784])


In [31]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [32]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.1677, -0.2056, -0.1430, -0.1840, -0.0558,  0.2176,  0.3065, -0.1982,
         -0.4467,  0.2053, -0.2286, -0.1961,  0.4031, -0.0521,  0.0547,  0.3832,
         -0.0581,  0.0461,  0.0473,  0.3992],
        [-0.5004,  0.0452, -0.5128, -0.0977,  0.0754,  0.1590, -0.2494, -0.3839,
         -0.3106,  0.0957, -0.2398,  0.3879,  0.1831,  0.0143,  0.1793,  0.4734,
         -0.1054,  0.4928,  0.1588,  0.4121],
        [-0.5606,  0.0332, -0.6507, -0.3006,  0.2806,  0.1505,  0.0781, -0.1502,
         -0.5643,  0.1053, -0.1817,  0.2531,  0.3976, -0.1308,  0.1596,  0.2778,
         -0.1429,  0.0565, -0.2723,  0.1296]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2176, 0.3065, 0.0000, 0.0000,
         0.2053, 0.0000, 0.0000, 0.4031, 0.0000, 0.0547, 0.3832, 0.0000, 0.0461,
         0.0473, 0.3992],
        [0.0000, 0.0452, 0.0000, 0.0000, 0.0754, 0.1590, 0.0000, 0.0000, 0.0000,
         0.0957, 0.0000, 0.3879, 0.1831, 0.0143, 0.17

nn.Sequential est un conteneur ordonné de modules. Les données sont transmises à tous les modules dans le même ordre que celui défini. Vous pouvez utiliser des conteneurs séquentiels pour créer un réseau rapide comme seq_modules

In [33]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [34]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [35]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")


Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0224,  0.0349,  0.0165,  ...,  0.0138,  0.0355,  0.0107],
        [-0.0114,  0.0177, -0.0093,  ..., -0.0041, -0.0289,  0.0148]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([ 0.0296, -0.0179], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0405,  0.0096, -0.0232,  ..., -0.0367, -0.0076, -0.0423],
        [ 0.0278,  0.0121,  0.0098,  ..., -0.0402, -0.0219, -0.0107]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu

In [18]:
t = torch.tensor([[[1,2],[3,4],[5,6],[7,8]]])
print(t.shape)
print(t)

torch.Size([1, 4, 2])
tensor([[[1, 2],
         [3, 4],
         [5, 6],
         [7, 8]]])


In [19]:
torch.flatten(t)

tensor([1, 2, 3, 4, 5, 6, 7, 8])

In [23]:
t = torch.tensor([[[1,2],[3,4],[5,6],[7,8]]])
torch.flatten(t, start_dim = 0, end_dim=-1)

tensor([1, 2, 3, 4, 5, 6, 7, 8])

In [26]:
x = torch.randn(32, 64, 7, 7)
# Aplatir à partir de la dimension 1 jusqu'à la dernière
x_end_flat = torch.flatten(x, end_dim=1)
x_start_flat = torch.flatten(x, start_dim=1)
x_flat = torch.flatten(x, start_dim=1, end_dim=-1)
print(x_end_flat.shape)
print(x_start_flat.shape)
print(x_flat.shape)

torch.Size([2048, 7, 7])
torch.Size([32, 3136])
torch.Size([32, 3136])


In [41]:
#torch.autograd

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y) # loss function

In [42]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x000002162659F220>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x0000021625F0F610>


In [43]:
# Optimizing the loss function : calculating the derivatives
loss.backward() # calculate the gradients
print(w.grad) # dloss/dw
print(b.grad) # dloss/db

tensor([[0.2709, 0.0931, 0.2362],
        [0.2709, 0.0931, 0.2362],
        [0.2709, 0.0931, 0.2362],
        [0.2709, 0.0931, 0.2362],
        [0.2709, 0.0931, 0.2362]])
tensor([0.2709, 0.0931, 0.2362])


In [44]:
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


In [45]:
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


There are reasons you might want to disable gradient tracking:

To mark some parameters in your neural network as frozen parameters.

To speed up computations when you are only doing forward pass, because computations on tensors that do not track gradients would be more efficient.