In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Master/Kaggle/Code/Integrating_Language_Guidance_into_Vision-based_Deep_Metric_Learning/demo4

/content/drive/MyDrive/Master/Kaggle/Code/Integrating_Language_Guidance_into_Vision-based_Deep_Metric_Learning/demo4


# import

In [3]:
import argparse
import collections
import contextlib
import copy
import json
import os
import random
import sys
import time
import warnings

warnings.filterwarnings("ignore")

import matplotlib

matplotlib.use('agg')
import matplotlib.pyplot as plt
import numpy as np
import pickle as pkl
import termcolor
from tqdm import tqdm

import parameters as par
import utilities.misc as misc

In [4]:
import torch, torch.nn as nn, torch.nn.functional as F
import torch.multiprocessing
import torchvision

torch.multiprocessing.set_sharing_strategy('file_system')

# ResNet

In [5]:
from torchvision.models import resnet50, ResNet50_Weights

class Network(torch.nn.Module):
# class Network(torch.jit.ScriptModule):
    def __init__(self, opt):
        super(Network, self).__init__()

        self.pars = opt
        self.model = resnet50(weights=ResNet50_Weights.DEFAULT)
        self.name = opt.arch
        
        opt.penultimate_dim = self.model.fc.in_features

        self.model.fc = torch.nn.Linear(
            self.model.fc.in_features, opt.embed_dim)

        self.layer_blocks = nn.ModuleList([
            self.model.layer1, self.model.layer2, self.model.layer3,
            self.model.layer4
        ])

        self.pool_base = torch.nn.AdaptiveAvgPool2d(1)
        # self.pool_aux = torch.nn.AdaptiveMaxPool2d(
        #     1) if 'double' in opt.arch else None

    # def forward(self, x, warmup=False, **kwargs):
    def forward(self, x):
        x = torchvision.transforms.functional.resize(x,size=[300, 300])
        x = x / 255.0
        x = torchvision.transforms.functional.normalize(x, 
                                            mean=[0.485, 0.456, 0.406], 
                                            std=[0.229, 0.224, 0.225])
        context = torch.no_grad()
        with context:
            x = self.model.maxpool(
                self.model.relu(self.model.bn1(self.model.conv1(x))))
            for i, layerblock in enumerate(self.layer_blocks):
                x = layerblock(x)
            y = self.pool_base(x)
            y = y.view(y.size(0), -1)

        z = self.model.fc(y)

        # if 'normalize' in self.pars.arch:
        z = torch.nn.functional.normalize(z, dim=-1)
        return z
        # return {
        #     'embeds': z,
        #     'avg_features': y,
        #     'features': x,
        #     'extra_embeds': prepool_y
        # }


# Make Submission File

In [6]:
output = torch.load(
    '/content/drive/MyDrive/Master/Kaggle/Code/Integrating_Language_Guidance_into_Vision-based_Deep_Metric_Learning/demo4/Training_Results/guie_CLIP_TensorFlow_train_example/multisimilarity_small_no_pesudolables_1/checkpoint_Val_embeds_e_recall@1_multisimilarity.pth.tar',
    map_location = torch.device('cpu'),
    )

print(output.keys())
opt = output['opt']

dict_keys(['state_dict', 'opt'])


In [7]:
opt.device = torch.device('cpu')
# opt.device = torch.device('cuda')
model = Network(opt)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [8]:
from torchsummary import summary
summary(model, (3 , 300, 300))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 150, 150]           9,408
       BatchNorm2d-2         [-1, 64, 150, 150]             128
              ReLU-3         [-1, 64, 150, 150]               0
         MaxPool2d-4           [-1, 64, 75, 75]               0
            Conv2d-5           [-1, 64, 75, 75]           4,096
            Conv2d-6           [-1, 64, 75, 75]           4,096
       BatchNorm2d-7           [-1, 64, 75, 75]             128
       BatchNorm2d-8           [-1, 64, 75, 75]             128
              ReLU-9           [-1, 64, 75, 75]               0
             ReLU-10           [-1, 64, 75, 75]               0
           Conv2d-11           [-1, 64, 75, 75]          36,864
           Conv2d-12           [-1, 64, 75, 75]          36,864
      BatchNorm2d-13           [-1, 64, 75, 75]             128
      BatchNorm2d-14           [-1, 64,

In [9]:
# from torchsummary import summary
# summary(model, (3 , 300, 300))

In [10]:
from PIL import Image
import torch
from torchvision import transforms

# Load image and extract its embedding.
input_image = Image.open('test_images/1_0.png').convert("RGB")
convert_to_tensor = transforms.Compose([transforms.PILToTensor()])
input_tensor = convert_to_tensor(input_image)
input_batch = input_tensor.unsqueeze(0)
out = model(input_batch)[0]
# out = embedding_fn(input_batch)[0]
with torch.no_grad():
  embedding = torch.flatten(out).cpu().data.numpy()

In [11]:
model.load_state_dict(output['state_dict'])
model.eval()
saved_model = torch.jit.script(model)
saved_model.save('saved_model.pt')
from zipfile import ZipFile

with ZipFile('submission.zip','w') as zip:           
  zip.write('saved_model.pt', arcname='saved_model.pt')

In [12]:
from PIL import Image
import torch
from torchvision import transforms

# Model loading.
model = torch.jit.load('saved_model.pt')
model.eval()
embedding_fn = model

# Load image and extract its embedding.
input_image = Image.open('test_images/1_0.png').convert("RGB")
convert_to_tensor = transforms.Compose([transforms.PILToTensor()])
input_tensor = convert_to_tensor(input_image)
input_batch = input_tensor.unsqueeze(0)
out = model(input_batch)[0]
# out = embedding_fn(input_batch)[0]
with torch.no_grad():
  embedding = torch.flatten(out).cpu().data.numpy()