In [1]:
from jupyter_utils import jupyter_setup
jupyter_setup()
import numpy as np
import pandas as pd
import torch
from torch import nn
from torchvision.models import resnet18, resnet50, ResNet18_Weights, ResNet50_Weights
import matplotlib.pyplot as plt
from torch.optim import Adam
from torch.optim.lr_scheduler import ExponentialLR
from torch.nn import InstanceNorm2d
from utils.utils import ResidualInstanceNorm2d
from torchinfo import summary

D:\Archiv\Studium\Master\6.-Semester\Masters_Thesis\Git\acoustic_covid_detection\python


# Set the learning rate for each layer
For transfer learning/fine tuning it is advantageous if the model retains most of the learned parameters in the early layers (more abstract representations like shapes). This can be achieved by setting the lr very small for early layers and higher for later layers. The extreme of that would be to freeze some layers (or all except for the classification layer)

In [2]:
def get_parameter_groups(model, output_lr, input_lr, weight_decay=1e-4, verbose=True):
    # applies different learning rates for each (parent) layer in the model (for finetuning a pretrained network).
    # the inout layer gets the input_lr, the output layer the output_lr. All layers in between get linearly interpolated.

    # works for resnet architecture and assigns a learning rate for each parent layer and the input and output layers
    # in total there are (for a resnet 18) 61 parameter groups but only 4 parent layers and 3 layers as input/output layers
    # this means there are only  4+3  different learning rates.
    
    parent_layer = lambda name: name.split(".")[0]    
    layer_names = [name for name, _ in model.named_parameters()]
    layer_names.reverse()
    parent_layers = list(set([parent_layer(layer) for layer in layer_names]))
    n_parent_layers = len(parent_layers)
    lr=output_lr
    last_parent_layer = parent_layer(layer_names[0])
    if verbose:
        print(f'0: lr = {lr:.6f}, {last_parent_layer}')
    
    lr_mult = np.power(input_lr/output_lr, 1/(n_parent_layers-1))
    parameters = []
    for idx, layer in enumerate(layer_names):
        current_parent_layer = parent_layer(layer)
        if last_parent_layer != (current_parent_layer):
            lr *= lr_mult
            if verbose:
                print(f'{idx}: lr = {lr:.6f}, {current_parent_layer}')
            last_parent_layer = current_parent_layer
        parameters.append({'params': [p for n, p in model.named_parameters() if n == layer and p.requires_grad],
                           'lr':     lr,
                           'weight_decay': weight_decay})
    return parameters

In [3]:
my_model = resnet18(weights=ResNet18_Weights.DEFAULT)

In [4]:
parameters = get_parameter_groups(my_model, output_lr=1e-3, input_lr=5e-5, verbose=True)

0: lr = 0.001000, fc
2: lr = 0.000607, layer4
17: lr = 0.000368, layer3
32: lr = 0.000224, layer2
47: lr = 0.000136, layer1
59: lr = 0.000082, bn1
61: lr = 0.000050, conv1


In [5]:
optimizer = Adam(parameters)
lr_scheduler = ExponentialLR(optimizer, gamma=0.5)

# Change the number of input channels
The pretrained resnet was trained on RGB images. Hence it has 3 input channels, for each color 1. I only have 1 channel, so what to do with the pretrained weights?

In [106]:
# create a single channel that has the mean of the 3 RGB channel weights
weights_single_channel = weights.mean(dim=1).unsqueeze(dim=1)
# create a single channel that has the channel weights of channel 0 (red channel i guess)
weights_single_color = weights[:, 0, :, :].unsqueeze(dim=1)

In [15]:
my_model.conv1 = nn.Conv2d(in_channels=1, out_channels=64 , kernel_size=7, stride=2, padding=3, bias=False)
my_model.conv1.weight = nn.Parameter(weights_single_channel)
# my_model.conv1.weight = nn.Parameter(weights_single_color)

# add dropout after each parent layer in resnet (everytime downsampling is applied)

In [17]:
my_model = resnet18(weights=ResNet18_Weights.DEFAULT)

In [18]:
my_model.layer1 = nn.Sequential(*my_model.layer1, nn.Dropout2d(p=0.1))
my_model.layer2 = nn.Sequential(*my_model.layer2, nn.Dropout2d(p=0.2))
my_model.layer3 = nn.Sequential(*my_model.layer3, nn.Dropout2d(p=0.3))
my_model.layer4 = nn.Sequential(*my_model.layer4, nn.Dropout2d(p=0.4))
my_model.avgpool = nn.Sequential(my_model.avgpool, nn.Dropout(p=0.5))
# my_model