In [1]:
import torch
import torch.nn as nn

In [None]:
## we can see that the input creates in each layer a defined number of outputs
## and that the output of one layer is the input of the next layer
## the bias is an additional parameter that is added to the output of the layer
## the activation functions are used to introduce non-linearity to the model
## ReLU is a commonly used activation function that sets all negative values to zero
## Softmax is used in the output layer to convert logits to probabilities

model = nn.Sequential(
    ## sequential is a container for layers
    
    nn.Linear(in_features=3, out_features=2, bias=True),
    ## linear layer with 3 inputs and 2 outputs
    nn.ReLU(),
    ## ReLU activation function: rectified linear unit

    nn.Linear(in_features=2, out_features=4, bias=True),
    nn.ReLU(),

    nn.Linear(in_features=4, out_features=2, bias=True),
    nn.Softmax(dim=1)
    ## softmax activation function: converts logits to probabilities
)

In [4]:
model

Sequential(
  (0): Linear(in_features=3, out_features=2, bias=True)
  (1): ReLU()
  (2): Linear(in_features=2, out_features=4, bias=True)
  (3): ReLU()
  (4): Linear(in_features=4, out_features=2, bias=True)
  (5): Softmax(dim=1)
)

In [6]:
sample_input = torch.randn(2, 3)
## create a sample input with 2 samples and 3 features
sample_input

tensor([[ 0.1930,  1.8500, -1.6389],
        [ 0.5727,  0.2204, -0.0729]])

In [7]:
output = model(sample_input)
output

tensor([[0.4221, 0.5779],
        [0.4229, 0.5771]], grad_fn=<SoftmaxBackward0>)