In [None]:
import keras

In [None]:
model = keras.applications.ResNet50(
    include_top=True,
    weights=None,
    input_tensor=None,
    input_shape=(256,512,3),
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
)

In [None]:
model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 256, 512, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 262, 518, 3)          0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 128, 256, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 128, 256, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                       

In [None]:
import torch
import torch.nn as nn
import math
# from models.resnet import resnet50
from torchvision.models import resnet50


class ProjectionMLP(nn.Module):
    def __init__(self, in_dim, mid_dim, out_dim):
        super(ProjectionMLP, self).__init__()
        self.l1 = nn.Sequential(
            nn.Linear(in_dim, mid_dim),
            nn.BatchNorm1d(mid_dim),
            nn.ReLU(inplace=True)
        )
        self.l2 = nn.Sequential(
            nn.Linear(mid_dim, mid_dim),
            nn.BatchNorm1d(mid_dim),
            nn.ReLU(inplace=True)
        )
        self.l3 = nn.Sequential(
            nn.Linear(mid_dim, out_dim),
            nn.BatchNorm1d(out_dim)
        )

    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)

        return x


class PredictionMLP(nn.Module):
    def __init__(self, in_dim, mid_dim, out_dim):
        super(PredictionMLP, self).__init__()
        self.l1 = nn.Sequential(
            nn.Linear(in_dim, mid_dim),
            nn.BatchNorm1d(mid_dim),
            nn.ReLU(inplace=True)
        )
        self.l2 = nn.Linear(mid_dim, out_dim)

    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)

        return x


class SimSiam(nn.Module):

    def __init__(self, backbone='resnet50', d=2048):
        super(SimSiam, self).__init__()

        if backbone == 'resnet50':
            net = resnet50()
        else:
            raise NotImplementedError('Backbone model not implemented.')

        num_ftrs = net.fc.in_features
        self.features = nn.Sequential(*list(net.children())[:-1])
        # num_ftrs = net.fc.out_features
        # self.features = net

        # projection MLP
        self.projection = ProjectionMLP(num_ftrs, 2048, 2048)
        # prediction MLP
        self.prediction = PredictionMLP(2048, 512, 2048)

        self.reset_parameters()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        # projection
        z = self.projection(x)
        # prediction
        p = self.prediction(z)
        return z, p

    def reset_parameters(self):
        # reset conv initialization to default uniform initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
                stdv = 1. / math.sqrt(n)
                m.weight.data.uniform_(-stdv, stdv)
                if m.bias is not None:
                    m.bias.data.uniform_(-stdv, stdv)
            elif isinstance(m, nn.Linear):
                stdv = 1. / math.sqrt(m.weight.size(1))
                m.weight.data.uniform_(-stdv, stdv)
                if m.bias is not None:
                    m.bias.data.uniform_(-stdv, stdv)

In [None]:
model2 = SimSiam()

In [None]:
from torchsummary import summary
summary(model2, (3,512,512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           9,408
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
         MaxPool2d-4         [-1, 64, 128, 128]               0
            Conv2d-5         [-1, 64, 128, 128]           4,096
       BatchNorm2d-6         [-1, 64, 128, 128]             128
              ReLU-7         [-1, 64, 128, 128]               0
            Conv2d-8         [-1, 64, 128, 128]          36,864
       BatchNorm2d-9         [-1, 64, 128, 128]             128
             ReLU-10         [-1, 64, 128, 128]               0
           Conv2d-11        [-1, 256, 128, 128]          16,384
      BatchNorm2d-12        [-1, 256, 128, 128]             512
           Conv2d-13        [-1, 256, 128, 128]          16,384
      BatchNorm2d-14        [-1, 256, 1

In [None]:
import torch
import torch.nn as nn
import math
# from models.resnet import resnet50
from torchvision.models import resnet50, resnet18

In [None]:
net = resnet50()

In [None]:
feature_maps = nn.Sequential(*list(net.children()))
print(feature_maps)

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [None]:
hola = list(net.children())[:-2]

In [None]:
print(hola[7])

NameError: name 'hola' is not defined

In [None]:
nueva_lista = copy.deepcopy(hola)
for i, layer in enumerate(nueva_lista):
    if isinstance(layer, nn.Sequential):
        for j, sub_layer in enumerate(layer):
            if j == len(layer) - 1 and i == len(nueva_lista) - 1:
                # Eliminar los atributos conv3 y bn3 del último sub_layer de la última secuencia
                delattr(sub_layer, 'conv3')
                delattr(sub_layer, 'bn3')

In [None]:
print(nueva_lista)

[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=Tr

In [None]:
nueva_lista = hola.copy()

for i in range(len(nueva_lista)):
    if isinstance(nueva_lista[i], nn.Sequential):
        for j in range(len(nueva_lista[i])):
                # Eliminar el último relu si es el último bottleneck del último sequential
                if j == len(nueva_lista[i]) - 1 and i == len(nueva_lista) -1:
                  del nueva_lista[i][j].conv3
                  del nueva_lista[i][j].bn3

print(nueva_lista)

[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=Tr

In [None]:
import copy
import torchvision

new_net_children = []

for i, module in enumerate(hola):
    if isinstance(module, nn.Sequential):
        new_module = []
        for j, bottleneck in enumerate(module):
            if j == len(module) - 1 and i == len(hola) - 1:
                # No incluir conv3 y bn3 para eliminarlos solo para el último bloque
                new_bottleneck = torchvision.models.resnet.Bottleneck(
                    conv1=copy.deepcopy(bottleneck.conv1),
                    bn1=copy.deepcopy(bottleneck.bn1),
                    conv2=copy.deepcopy(bottleneck.conv2),
                    bn2=copy.deepcopy(bottleneck.bn2),
                    relu=copy.deepcopy(bottleneck.relu)
                )
                new_module.append(new_bottleneck)
            else:
                new_module.append(copy.deepcopy(bottleneck))
        new_net_children.append(nn.Sequential(*new_module))
    else:
        new_net_children.append(copy.deepcopy(module))

TypeError: Bottleneck.__init__() got an unexpected keyword argument 'conv1'

In [None]:
print(list(hola[-1][-1].children())[:-3])

[Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False), BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False), BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)]


In [None]:
print(hola[:][:-1])

[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=Tr

In [None]:
from torchsummary import summary
summary(net, (3,512, 1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 512]           9,408
       BatchNorm2d-2         [-1, 64, 256, 512]             128
              ReLU-3         [-1, 64, 256, 512]               0
         MaxPool2d-4         [-1, 64, 128, 256]               0
            Conv2d-5         [-1, 64, 128, 256]           4,096
       BatchNorm2d-6         [-1, 64, 128, 256]             128
              ReLU-7         [-1, 64, 128, 256]               0
            Conv2d-8         [-1, 64, 128, 256]          36,864
       BatchNorm2d-9         [-1, 64, 128, 256]             128
             ReLU-10         [-1, 64, 128, 256]               0
           Conv2d-11        [-1, 256, 128, 256]          16,384
      BatchNorm2d-12        [-1, 256, 128, 256]             512
           Conv2d-13        [-1, 256, 128, 256]          16,384
      BatchNorm2d-14        [-1, 256, 1

In [None]:
!pip install -q vit-pytorch
!pip install -q tqdm

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/100.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/100.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.3/100.3 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import numpy as np
import os
from torch.utils.data import Dataset
import torch
import torch.nn.functional as F
import torchvision.transforms.functional as TF

In [None]:
import skimage
from skimage import io

def is_png_file(filename):
    return any(filename.endswith(extension) for extension in [".png"])

def load_img(filepath):
    img = io.imread(filepath)
    img = img.astype(np.float32)
    #img = img/255.
    '''for i in range(img.shape[-1]):  # Z-Score normalization for every channel
        mean = img[:,:,i].mean()
        std = img[:,:,i].std()
        img[:,:,i] -= mean
        img /= (max(std, 1e-8))'''
    return img

In [None]:
class DataLoaderTrain(Dataset):
    def __init__(self, rgb_dir):
        super(DataLoaderTrain, self).__init__()

        input_files = sorted(os.listdir(rgb_dir))

        self.input_filenames = [os.path.join(rgb_dir, x) for x in input_files if is_png_file(x)]

        self.tar_size = len(self.input_filenames)  # get the size of target

    def __len__(self):
        return self.tar_size

    def __getitem__(self, index):
        tar_index   = index % self.tar_size
        input = np.float32(load_img(self.input_filenames[tar_index]))
        mean = input.mean()
        std = input.std()
        input -= mean
        input /= (max(std, 1e-8))
        input = torch.from_numpy(input)

        input = input.permute(2,0,1)

        input_filename = os.path.split(self.input_filenames[tar_index])[-1]

        return input, input_filename

In [None]:
def get_training_data(rgb_dir):
    assert os.path.exists(rgb_dir)
    return DataLoaderTrain(rgb_dir)

In [None]:
from torch.utils.data import DataLoader

train_dataset = get_training_data('/content/drive/MyDrive/TFG/Uformer/V2/dataV3/CholecSeg8k/little_val/x')
train_loader = DataLoader(dataset=train_dataset, batch_size=4, shuffle=True,
        num_workers=6, pin_memory=False, drop_last=False)

In [None]:
import torch
from tqdm import tqdm
from vit_pytorch import ViT, MAE

v = ViT(
    image_size = (15,27),
    patch_size = (5, 9),
    num_classes = 13,
    dim = 1024,
    depth = 6,
    heads = 8,
    mlp_dim = 2048
)

mae = MAE(
    encoder = v,
    masking_ratio = 0.75,   # the paper recommended 75% masked patches
    decoder_dim = 512,      # paper showed good results with just 512
    decoder_depth = 6       # anywhere from 1 to 8
)

#mae
images = torch.randn(30, 3, 15, 27)
epoch_loss = 0
'''
for epoch in range(150):
  for i, data in enumerate(tqdm(train_loader), 0):
    _input = data[0].cuda()
    loss = mae(_input)
    loss.backward()
    epoch_loss+= loss
  torch.cuda.empty_cache()
  epoch_loss = 4*epoch_loss/train_dataset.__len__()
  print("Epoca: ", epoch, "\nPerdida: ", epoch_loss)
'''
for epoch in range(150):
  loss = mae(images)
  loss.backward()
  print(epoch)

0
1
2
3
4


KeyboardInterrupt: 

In [None]:
import torch
from tqdm import tqdm
from vit_pytorch.vit_3d import ViT

v = ViT(
    image_size = (12,20),          # image size
    frames = 1,               # number of frames
    image_patch_size = (3,5),     # image patch size
    frame_patch_size = 1,      # frame patch size
    num_classes = 13,
    dim = 1024,
    depth = 6,
    heads = 8,
    mlp_dim = 2048,
    dropout = 0.1,
    emb_dropout = 0.1
)

video = torch.randn(30, 512, 1, 12, 20) # (batch, channels, frames, height, width)

preds = v(video)

RuntimeError: Given normalized_shape=[45], expected input with shape [*, 45], but got input of size[30, 16, 7680]

In [None]:
import torch
import h5py

with h5py.File('/content/drive/MyDrive/TFG/Parte2/DSSL/simsiam_hyperKvir/logs/resnet50/feature_maps_light/feature_map_00003.pt', 'r') as hf:
          img = torch.tensor(hf['feature_map_00003.pt'][:])
img.shape

torch.Size([512, 16, 32])

In [None]:
print(fm.shape)

torch.Size([2048, 16, 32])


In [None]:
import os
DIR = '/content/drive/MyDrive/TFG/Parte2/DSSL/simsiam_hyperKvir/logs/resnet50/feature_maps_light'
print(len([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))]))

24010


In [None]:
hey = torch.load('/content/drive/MyDrive/TFG/Parte2/DSSL/simsiam_hyperKvir/logs/resnet50/checkpoint_latest.pth.tar', map_location=torch.device('cpu'))
print(hey.keys())

dict_keys(['epoch', 'arch', 'state_dict', 'optimizer'])


In [None]:
claves = hey.keys()
features = [x for x in hey['state_dict'].keys() if x.split('.')[0]=='features']
print(features)

['features.0.weight', 'features.1.weight', 'features.1.bias', 'features.1.running_mean', 'features.1.running_var', 'features.1.num_batches_tracked', 'features.4.0.conv1.weight', 'features.4.0.bn1.weight', 'features.4.0.bn1.bias', 'features.4.0.bn1.running_mean', 'features.4.0.bn1.running_var', 'features.4.0.bn1.num_batches_tracked', 'features.4.0.conv2.weight', 'features.4.0.bn2.weight', 'features.4.0.bn2.bias', 'features.4.0.bn2.running_mean', 'features.4.0.bn2.running_var', 'features.4.0.bn2.num_batches_tracked', 'features.4.0.conv3.weight', 'features.4.0.bn3.weight', 'features.4.0.bn3.bias', 'features.4.0.bn3.running_mean', 'features.4.0.bn3.running_var', 'features.4.0.bn3.num_batches_tracked', 'features.4.0.downsample.0.weight', 'features.4.0.downsample.1.weight', 'features.4.0.downsample.1.bias', 'features.4.0.downsample.1.running_mean', 'features.4.0.downsample.1.running_var', 'features.4.0.downsample.1.num_batches_tracked', 'features.4.1.conv1.weight', 'features.4.1.bn1.weight',

In [None]:
features_keys = [x for x in features if not (x.startswith('features.7.2.conv3') or x.startswith('features.7.2.bn3'))]
state_dict_filtered = {k: v for k, v in hey['state_dict'].items() if k in features_keys}
hey['state_dict'] = state_dict_filtered
print(hey['state_dict'].keys())
print(hey.keys())

dict_keys(['features.0.weight', 'features.1.weight', 'features.1.bias', 'features.1.running_mean', 'features.1.running_var', 'features.1.num_batches_tracked', 'features.4.0.conv1.weight', 'features.4.0.bn1.weight', 'features.4.0.bn1.bias', 'features.4.0.bn1.running_mean', 'features.4.0.bn1.running_var', 'features.4.0.bn1.num_batches_tracked', 'features.4.0.conv2.weight', 'features.4.0.bn2.weight', 'features.4.0.bn2.bias', 'features.4.0.bn2.running_mean', 'features.4.0.bn2.running_var', 'features.4.0.bn2.num_batches_tracked', 'features.4.0.conv3.weight', 'features.4.0.bn3.weight', 'features.4.0.bn3.bias', 'features.4.0.bn3.running_mean', 'features.4.0.bn3.running_var', 'features.4.0.bn3.num_batches_tracked', 'features.4.0.downsample.0.weight', 'features.4.0.downsample.1.weight', 'features.4.0.downsample.1.bias', 'features.4.0.downsample.1.running_mean', 'features.4.0.downsample.1.running_var', 'features.4.0.downsample.1.num_batches_tracked', 'features.4.1.conv1.weight', 'features.4.1.bn

In [None]:
print(hey['state_dict']['features.0.weight'])

tensor([[[[ 4.0960e-02, -4.5461e-02, -4.3192e-02,  ..., -1.1232e-02,
           -1.7687e-02, -3.7987e-02],
          [ 1.4980e-02, -2.7269e-02,  2.2831e-02,  ..., -9.6498e-03,
            5.1178e-03, -2.9816e-02],
          [ 2.1066e-02,  4.2191e-03,  2.0848e-03,  ...,  1.2058e-02,
            1.6102e-02,  1.2281e-02],
          ...,
          [ 1.1706e-02, -5.9333e-03,  1.1491e-02,  ...,  2.1931e-02,
           -1.3246e-03, -3.5314e-02],
          [-5.9471e-03, -6.2125e-02, -3.1443e-02,  ..., -4.7594e-03,
           -4.8082e-02, -6.7737e-02],
          [-1.2331e-02, -5.6960e-02, -2.9785e-02,  ..., -4.4195e-02,
           -6.0342e-02, -2.4785e-02]],

         [[ 1.3895e-02, -1.8490e-02, -2.5026e-02,  ..., -2.5087e-02,
           -5.6475e-03,  4.3826e-03],
          [ 7.0338e-03, -1.9813e-02, -5.7606e-03,  ..., -1.5327e-02,
           -1.0803e-02, -6.1032e-03],
          [ 5.2435e-03, -1.9754e-02,  1.3894e-02,  ...,  7.6334e-04,
            1.6362e-02,  5.3395e-03],
          ...,
     

In [None]:
a = 'jamaica'
b = 'queso'
print(zip(a,b))

for e, c in zip(a,b):
  print(e, c)

<zip object at 0x7ca6f987e240>
j q
a u
m e
a s
i o


In [None]:
import torch
import torch.nn as nn

model = nn.Sequential(
          nn.Conv2d(1,20,5),
          nn.ReLU(),
          nn.Conv2d(20,64,5),
          nn.ReLU()
        )
print(model)

Sequential(
  (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
  (3): ReLU()
)


In [None]:
def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()

        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * Bottleneck.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * Bottleneck.expansion)
        self.relu = nn.ReLU(inplace=True)

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)
        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class BottleneckLast(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BottleneckLast, self).__init__()

        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        if residual.shape[1] == out.shape[1]:
            out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):
    def __init__(self, depth):
        super(ResNet, self).__init__()

        blocks = {18: BasicBlock, 34: BasicBlock, 50: Bottleneck, 101: Bottleneck, 152: Bottleneck, 200: Bottleneck}
        layers = {18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3],
                    200: [3, 24, 36, 3]}
        assert layers[depth], 'invalid detph for ResNet (depth should be one of 18, 34, 50, 101, 152, and 200)'

        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(blocks[depth], 64, layers[depth][0])
        self.layer2 = self._make_layer(blocks[depth], 128, layers[depth][1], stride=2)
        self.layer3 = self._make_layer(blocks[depth], 256, layers[depth][2], stride=2)
        self.layer4 = self._make_layer(BottleneckLast, 512, layers[depth][3], stride=2)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        if block == BottleneckLast:
            layers.append(Bottleneck(self.inplanes, planes, stride, downsample))
        else:
            layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            if block == BottleneckLast and i == blocks -1:
                layers.append(block(self.inplanes, planes))
            else:
                layers.append(Bottleneck(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        return x


def resnet50():
    return ResNet(depth=50)


def resnet18():
    return ResNet(depth=18)

In [None]:
import math
class FeatureGetter(nn.Module):

    def __init__(self, backbone='resnet50', d=2048):
        super(FeatureGetter, self).__init__()

        if backbone == 'resnet50':
            net = resnet50()
        elif backbone == 'resnet18':
            net = resnet18()
        else:
            raise NotImplementedError('Backbone model not implemented.')

        self.features = nn.Sequential(*list(net.children()))

        self.reset_parameters()

    def forward(self, x):
        x = self.features(x)

        return x

    def reset_parameters(self):
        # reset conv initialization to default uniform initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
                stdv = 1. / math.sqrt(n)
                m.weight.data.uniform_(-stdv, stdv)
                if m.bias is not None:
                    m.bias.data.uniform_(-stdv, stdv)
            elif isinstance(m, nn.Linear):
                stdv = 1. / math.sqrt(m.weight.size(1))
                m.weight.data.uniform_(-stdv, stdv)
                if m.bias is not None:
                    m.bias.data.uniform_(-stdv, stdv)

In [None]:
!pip install -q vit_pytorch

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.6/117.6 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from vit_pytorch import ViT, MAE, SimpleViT

contrastive = FeatureGetter(backbone='resnet50')

v = SimpleViT(
        image_size = (16,32),
        patch_size = (4, 8),
        num_classes = 13,
        dim = 1024,
        depth = 6,
        heads = 8,
        mlp_dim = 2048,
        channels = 512
    )
generative = MAE(
        encoder = v,
        masking_ratio = 0.75,   # the paper recommended 75% masked patches
        decoder_dim = 512,      # paper showed good results with just 512
        decoder_depth = 6       # anywhere from 1 to 8
    )

In [None]:
model = nn.Sequential(
        contrastive,
        generative
    )

print(model)

Sequential(
  (0): FeatureGetter(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
      

In [None]:
!pip install -q batchgenerators

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.7/61.7 kB[0m [31m638.3 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.4/96.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for batchgenerators (setup.py) ... [?25l[?25hdone


In [None]:
from batchgenerators.utilities.file_and_folder_operations import *
import random

all_files = sorted(os.listdir('/content/drive/MyDrive/TFG/Parte2/little_images'))
val_files = random.sample(all_files, 150)
train_files = list(set(all_files) - set(val_files))

print('todos: ',len(all_files))
print('val: ',len(val_files))
print('resta: ', len(train_files))

todos:  500
val:  150
resta:  350


In [None]:
input_files = sorted(os.listdir('/content/drive/MyDrive/TFG/Parte2/little_images'))
print(input_files[0])
print(val_files[0])

pre-training_0.jpg
pre-training_228.jpg


In [10]:
import skimage
from skimage import io

fm = io.imread('/content/drive/MyDrive/TFG/Parte2/target_data/test/y/testing_groundtruth-8066.png')
print(fm.shape)

(512, 1024)
