In [5]:
import torchvision.datasets as datasets
import torch
from torch import nn
import ml_collections
from tqdm import tqdm

# Download dataset
dataset = datasets.Flowers102(root='./', download=True)

In [2]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

driver = webdriver.Chrome()

In [18]:
class Encoder(nn.Module):
    def __init__(self, config):
        super(Encoder, self).__init__()
        self.target_input_size = config.target_input_size
        
        self.layer = nn.ModuleList()
        
    def forward(self, x):
        batch_size, channels, height, width = x.size()
        
        x = nn.functional.adaptive_avg_pool2d(x, self.target_input_size)
        
# List of available pretrained resnets from pytorch
class Pretrains():
    resnet_versions = [
        'resnet18',
        'resnet34',
        'resnet50',
        'resnet101',
        'resnet152'
    ]
    vgg_versions = [
        'vgg11',
        'vgg11_bn',
        'vgg13',
        'vgg13_bn',
        'vgg16',
        'vgg16_bn',
        'vgg19',
        'vgg19_bn'
    ]
            
class PretrainBackbone(nn.Module):
    def __init__(self, config):
        super(ResNetBackbone, self).__init__()
        
        # Load pretrained ResNet/VGG backbone
        if config.pretrain in Pretrains.resnet_versions or config.resnet_version in Pretrains.vgg_versions:
            model = torch.hub.load('pytorch/vision:v0.10.0', config.pretrain, pretrained=True)
        else:
            raise ValueError('Invalid ResNet/VGG Version. Please select from: ' 
                             + ', '.join(Pretrains.resnet_versions + Pretrains.vgg_versions))
        
        # Segments out only the backbone layers as list, unpacks, and load into nn.Sequential
        backbone_layers = list(model.children())[:-1]
        self.backbone = nn.Sequential(*backbone_layers)
        
    def forward(self, x):
        x = self.backbone(x)
        return x
    
def get_all_configs(training_config):
    config_list = []
    configs_num = len(training_config.pretrains) \
        * len(training_config.input_sizes) \
        * len(training_config.encoder_decoder.depths) \
        * len(training_config.encoder_decoder.widths) \
        * len(training_config.encoder_decoder.activations) \
        * len(training_config.encoder_decoder.bottlenecks) \
        * len(training_config.ffn.depths) \
        * len(training_config.ffn.widths) \
        * len(training_config.ffn.activations) \
        * len(training_config.ffn.dropouts)
    pbar = tqdm('Generating Configs', total = configs_num, position = 0, leave = True)
    # Pretrain hyperparameters
    for pretrain in training_config.pretrains:
        
        for input_size in training_config.input_sizes:
            # Encoder hyperparameters
            for encoder_decoder_depth in training_config.encoder_decoder.depths:
                for encoder_decoder_width in training_config.encoder_decoder.widths:
                    for encoder_decoder_activation in training_config.encoder_decoder.activations:
                        for encoder_decoder_bottleneck in training_config.encoder_decoder.bottlenecks:

                            # FFN hyperparameters
                            for ffn_depth in training_config.ffn.depths:
                                for ffn_width in training_config.ffn.widths:
                                    for ffn_activation in training_config.ffn.activations:
                                        for ffn_dropout in training_config.ffn.dropouts:
                                            # Initialize config dict
                                            config = ml_collections.ConfigDict()

                                            # Input
                                            config.input_size = input_size

                                            # Pretrain Network
                                            config.pretrain = pretrain

                                            # Encoder/Decoder Network (both use the same structure)
                                            config.encoder_decoder = ml_collections.ConfigDict()
                                            config.encoder_decoder.depth = encoder_decoder_depth
                                            config.encoder_decoder.width = encoder_decoder_width
                                            config.encoder_decoder.activation = encoder_decoder_activation
                                            config.encoder_decoder.bottleneck = encoder_decoder_bottleneck
                                            config.encoder_decoder.output_size =  input_size

                                            config.encoder_decoder.max_epoch = training_config.encoder_decoder.max_epoch

                                            # FFN
                                            config.ffn = ml_collections.ConfigDict()
                                            config.ffn.depth = ffn_depth
                                            config.ffn.width = ffn_width
                                            config.ffn.activation = ffn_activation
                                            config.ffn.dropout = ffn_dropout

                                            # Output
                                            config.num_classes = training_config.num_classes

                                            config.max_epoch = training_config.max_epoch

                                            config_list.append(config)
                                            pbar.update(1)
    return config_list

training_config = ml_collections.ConfigDict()
training_config.input_sizes = [(224, 224)]
training_config.pretrains = Pretrains.resnet_versions + Pretrains.vgg_versions

training_config.encoder_decoder = ml_collections.ConfigDict()
training_config.encoder_decoder.depths = [4, 8, 16]
training_config.encoder_decoder.widths = [64, 256, 512]
training_config.encoder_decoder.activations = ['ReLU']
training_config.encoder_decoder.bottlenecks = [16, 32, 64, 128]
training_config.encoder_decoder.max_epoch = 100

training_config.ffn = ml_collections.ConfigDict()
training_config.ffn.depths = [None]
training_config.ffn.widths = [None]
training_config.ffn.activations = [None]
training_config.ffn.dropouts = [None]

training_config.num_classes = None
training_config.max_epoch = 100

training_configs = get_all_configs(training_config)
training_configs[0]

100%|██████████████████████████████████████████████████████████████████████████████| 468/468 [00:00<00:00, 9832.71it/s]


encoder_decoder:
  activation: ReLU
  bottleneck: 16
  depth: 4
  max_epoch: 100
  output_size: &id001 !!python/tuple
  - 224
  - 224
  width: 64
ffn:
  activation: null
  depth: null
  dropout: null
  width: null
input_size: *id001
max_epoch: 100
num_classes: null
pretrain: resnet18

In [2]:
training_config = ml_collections.ConfigDict()
training_config.input_sizes = [(224, 224)]
training_config.pretrains = Pretrains.resnet_versions + Pretrains.vgg_versions

training_config.encoder_decoder = ml_collections.ConfigDict()
training_config.encoder_decoder.depths = [4, 8, 16]
training_config.encoder_decoder.widths = [64, 256, 512]
training_config.encoder_decoder.activations = ['ReLU']
training_config.encoder_decoder.bottlenecks = [16, 32, 64, 128]
training_config.encoder_decoder.max_epoch = 100

training_config.ffn = ml_collections.ConfigDict()
training_config.ffn.depths = [None]
training_config.ffn.widths = [None]
training_config.ffn.activations = [None]
training_config.ffn.dropouts = [None]

training_config.num_classes = None
training_config.max_epoch = 100

training_configs = get_all_configs(training_config)
training_configs[0]
import torchvision.models as models
resnet = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\iraha/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|█████████████████████████████████████████████████████████████████████████████| 97.8M/97.8M [00:01<00:00, 69.0MB/s]


In [14]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg11', pretrained=True)
model

Using cache found in C:\Users\iraha/.cache\torch\hub\pytorch_vision_v0.10.0


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

In [15]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg11_bn', pretrained=True)
model

Using cache found in C:\Users\iraha/.cache\torch\hub\pytorch_vision_v0.10.0
Downloading: "https://download.pytorch.org/models/vgg11_bn-6002323d.pth" to C:\Users\iraha/.cache\torch\hub\checkpoints\vgg11_bn-6002323d.pth
100%|███████████████████████████████████████████████████████████████████████████████| 507M/507M [00:07<00:00, 66.7MB/s]


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): MaxPool2d(ke