In [1]:
import pandas as pd
import glob
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import utilities
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import timm
import math
from transformers import (get_linear_schedule_with_warmup, 
                          get_cosine_schedule_with_warmup, 
                          get_cosine_with_hard_restarts_schedule_with_warmup,
                          get_constant_schedule_with_warmup)
from tqdm import tqdm
import faiss
import random
import gc
import transformers
from transformers import CLIPProcessor, CLIPVisionModel,  CLIPVisionConfig
from PIL import Image
from torchvision import transforms
from pytorch_metric_learning import losses
import open_clip

In [2]:
class CFG:
    model_name = 'ViT-L-14-336' 
    model_data = 'openai'
    samples_per_class = 10000000
    min_samples = 2
    image_size = 336 
    seed = 5
    workers = 6
    train_batch_size = 8
    valid_batch_size = 32 
    emb_size = 512
    vit_bb_lr = {'8': 1.25e-6, '16': 2.5e-6, '20': 5e-6, '24': 10e-6} 
    vit_bb_wd = 1e-3
    hd_lr = 3e-4
    hd_wd = 1e-5
    autocast = True
    n_warmup_steps = 1000
    n_epochs = 1
    device = torch.device('cuda')
    s=30.
    m=.45
    m_min=.05
    freeze_norm = False
    acc_steps = 4
    global_step = 0

In [3]:
vit_backbone, model_transforms, _ = open_clip.create_model_and_transforms(CFG.model_name)

In [4]:
class Head(nn.Module):
    def __init__(self, hidden_size):
        super(Head, self).__init__()

        self.emb = nn.Linear(hidden_size, CFG.emb_size, bias=False)
        self.arc = None
        self.dropout = utilities.Multisample_Dropout()

    def forward(self, x):
        embeddings = self.dropout(x, self.emb)
        
        output = self.arc(embeddings)

        return output, embeddings

In [5]:
class Model(nn.Module):
    def __init__(self, vit_backbone):
        super(Model, self).__init__()

        self.vit_backbone = vit_backbone

        self.head = Head(768)

    def forward(self, images):

        x = transforms.functional.resize(images, size=[CFG.image_size, CFG.image_size]) 
        x = x/255
        x = transforms.functional.normalize(x,  
                                             mean=model_transforms.transforms[-1].mean, 
                                             std=model_transforms.transforms[-1].std)

        x = self.vit_backbone.encode_image(x)
        
        return self.head(x)



In [6]:
import torch

path_list =  [
              '/home/ivan/Desktop/GUIE/models/soup-v1/ViT-L-14-336',
              '/home/ivan/Desktop/GUIE/models/soup-v2/ViT-L-14-336',
              '/home/ivan/Desktop/GUIE/models/soup-v3/ViT-L-14-336',
              '/home/ivan/Desktop/GUIE/models/soup-v4/ViT-L-14-336'
              ]

# Load models weights
weight_list = []

model = Model(vit_backbone)
weight_list.append(model.state_dict())

for path in path_list:
    model = Model(vit_backbone)
    model.load_state_dict(torch.load(path), strict=False)
    weight_list.append(model.state_dict())

# Average weights
state_dict = dict((k, torch.stack([v[k] for v in weight_list]).mean(0)) for k in weight_list[0])
model.load_state_dict(state_dict)



<All keys matched successfully>

In [7]:
model_name = CFG.model_name.replace('/','-')
torch.save(model.state_dict(), f'../models/{model_name}-soup')