# Layer initialization and transfer learning

In [6]:
import torch.nn as nn

layer = nn.Linear(64, 128)
print(layer.weight.min())
print(layer.weight.max())

tensor(-0.1250, grad_fn=<MinBackward1>)
tensor(0.1249, grad_fn=<MaxBackward1>)


In [11]:
import torch.nn as nn

layer = nn.Linear(64, 128)
nn.init.uniform_(layer.weight) # >> layer initialization
print(layer.weight.min())
print(layer.weight.max())

tensor(0.0006, grad_fn=<MinBackward1>)
tensor(1.0000, grad_fn=<MaxBackward1>)


In [13]:
# transfer learning
import torch

layer = nn.Linear(64, 128)
torch.save(layer, "layer.pth")

new_layer = torch.load("layer.pth", weights_only=False)

In [None]:
# fine-tuning

import torch.nn as nn

model = nn.Sequential(
    nn.Linear(64, 128),
    nn.Linear(128, 256)
)

for name, param in model.named_parameters():
    if name == "0.weight":
        param.requires_grad = False

# Evaluating model performance
- Training set >> adjust model parameters (weight/bias)
- Validation set >> tune hyperparameters (learning rate/momentum)
- Test set >> evaluate model final performance

In [None]:
training_loss = 0.0
for inputs, labels in trainloader:
    outputs = model(input)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    training_loss += loss.item()
epoch_loss = training_loss/len(trainloader)

# evaluation
validation_loss = 0.0
model.eval() # put the model on evaluation mode

with torch.no_grad():
    for inputs, labels in validationloader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        validation_loss += loss.item()
evaluation_loss = validation_loss / len(validationloader)

model.train() # to proceed with the training for the next epoch

# when the model starts overfitting, training loss keeps decreasing, but the validation loss starts to rise

In [None]:
# calculate model accuracy
import torchmetrics

metric = torchmetrics.Accuracy(task="multiclass", num_classes=3)

for features, labels in dataloader:
    outputs = model(features)
    metric.update(outputs, labels.argmax(dim=-1))

# at the end of each epoch calculcate the accuracey
accuracy = metric.compute()

# reset the metric for the next epoch
metric.reset()

# Fighting overfitting

In [None]:
# 1. dropout layer
model = nn.Sequential(
    nn.Linear(8,4),
    nn.ReLU(),
    nn.Dropout(p=0.5) # p is the probability of the neuron being set to zero, in this case it is 50%
)

# 2. weight decay
optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay=0.001)

# 3. data augmentation

# Improving model performance