In [53]:
from jupyter_utils import jupyter_setup
jupyter_setup()
import numpy as np
import pandas as pd
import torch
from torch import nn
from torchvision.models import resnet18, resnet50, ResNet18_Weights, ResNet50_Weights


D:\Archiv\Studium\Master\6.-Semester\Masters_Thesis\Git\acoustic_covid_detection\python


In [54]:
import matplotlib.pyplot as plt

In [3]:
def get_parameter_groups(model, output_lr, input_lr, verbose=True):
    # applies different learning rates for each (parent) layer in the model (for finetuning a pretrained network).
    # the inout layer gets the input_lr, the output layer the output_lr. All layers in between get linearly interpolated.

    # works for resnet architecture and assigns a learning rate for each parent layer and the input and output layers
    # in total there are (for a resnet 18) 61 parameter groups but only 4 parent layers and 3 layers as input/output layers
    # this means there are only  4+3  different learning rates.
    
    parent_layer = lambda name: name.split(".")[0]    
    layer_names = [name for name, _ in model.named_parameters()]
    layer_names.reverse()
    parent_layers = list(set([parent_layer(layer) for layer in layer_names]))
    n_parent_layers = len(parent_layers)
    lr=output_lr
    last_parent_layer = parent_layer(layer_names[0])
    if verbose:
        print(f'0: lr = {lr:.6f}, {last_parent_layer}')
    
    lr_mult = np.power(input_lr/output_lr, 1/(n_parent_layers-1))
    parameters = []
    for idx, layer in enumerate(layer_names):
        current_parent_layer = parent_layer(layer)
        if last_parent_layer != (current_parent_layer):
            lr *= lr_mult
            if verbose:
                print(f'{idx}: lr = {lr:.6f}, {current_parent_layer}')
            last_parent_layer = current_parent_layer
        parameters.append({'params': [p for n, p in model.named_parameters() if n == layer and p.requires_grad],
                           'lr':     lr})
    return parameters

In [4]:
my_model = resnet18(weights=ResNet18_Weights.DEFAULT)

In [None]:
parameters = get_parameter_groups(my_model, output_lr=1e-3, input_lr=5e-5, verbose=True)

In [22]:
my_model.conv1

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [19]:
weights = my_model.conv1.weight

In [21]:
weights_single_channel = weights.mean(dim=1).unsqueeze(dim=1)
weights_single_channel.shape

torch.Size([64, 1, 7, 7])

In [23]:
in_channels = 1
out_channels = 64
my_model.conv1 = nn.Conv2d(in_channels, out_channels , kernel_size=7, stride=2, padding=3, bias=False)

In [28]:
my_model.conv1.weight = nn.Parameter(weights_single_channel)

In [50]:
inp = torch.randn((2, 1, 20, 20))
inp.shape

torch.Size([2, 1, 20, 20])

In [51]:
out = my_model(inp)
out

tensor([[-0.6966, -4.1082, -1.0576,  ..., -2.0853,  1.7552, -0.7383],
        [ 0.2955,  2.9958, -0.6703,  ..., -0.3887,  1.8373,  2.0532]],
       grad_fn=<AddmmBackward0>)

In [52]:
out.shape

torch.Size([2, 1000])

In [49]:
my_model

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  