# 8.I2E Ensamble 

In [1]:
import os
import sys
import pickle
import json
import glob
import gc
import random
import time
import unicodedata
import traceback
import datetime
import copy
import itertools

import numpy as np
import pandas as pd


from matplotlib import pyplot as plt 
from tqdm.notebook import tqdm
from pathlib import Path
from scipy.spatial import distance
from collections import defaultdict
from PIL import Image

from sklearn.model_selection import train_test_split

import torch
import torchvision
from torchvision.models import (
    vit_b_16, ViT_B_16_Weights, 
    vit_l_16, ViT_L_16_Weights,
    vit_h_14, ViT_H_14_Weights,
    regnet_y_32gf, RegNet_Y_32GF_Weights,
    regnet_y_128gf, RegNet_Y_128GF_Weights,
    regnet_y_16gf, RegNet_Y_16GF_Weights,
    efficientnet_v2_l, EfficientNet_V2_L_Weights,
    efficientnet_v2_m, EfficientNet_V2_M_Weights,
    convnext_large, ConvNeXt_Large_Weights,
    swin_v2_b, Swin_V2_B_Weights
)
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import (
    StepLR, MultiStepLR, 
    ConstantLR, LinearLR, 
    ExponentialLR, PolynomialLR, 
    CosineAnnealingLR, CosineAnnealingWarmRestarts, 
    CyclicLR, OneCycleLR, 
    ReduceLROnPlateau
)

os.environ["TOKENIZERS_PARALLELISM"] = "false"

def set_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

2023-05-12 21:18:54.354671: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-12 21:18:54.380884: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
batch_size_config = {
    "vit_b_16": {
        True: 256,
        False: 16
    },
    "vit_b_16_linear": {
        True: 256,
        False: 48
    },
    "regnet_y_16gf": {
        True: 64,
        False: 26
    },
    "regnet_y_16gf_linear": {
        True: 64,
        False: 26
    },
    "regnet_y_32gf": {
        True: 16,
        False: 6
    },
    "regnet_y_32gf_linear": {
        True: 16,
        False: 20
    },
}

In [3]:
def get_img_model(img_model_name: str, load_weight: bool, head_emb_size: int):
    if img_model_name == "regnet_y_16gf":
        if not load_weight:
            model = regnet_y_16gf()
        else:
            weights = RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_E2E_V1
            model = regnet_y_16gf(weights=weights)
        model.fc = torch.nn.Linear(3024, head_emb_size)
        
        preprocess = transforms.Compose([
            transforms.Resize(224, interpolation=transforms.InterpolationMode.BILINEAR),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225]),
        ])
    if img_model_name == "regnet_y_16gf_linear":
        if not load_weight:
            model = regnet_y_16gf()
        else:
            weights = RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_LINEAR_V1
            model = regnet_y_16gf(weights=weights)
        model.fc = torch.nn.Linear(3024, head_emb_size)
        
        preprocess = transforms.Compose([
            transforms.Resize(224, interpolation=transforms.InterpolationMode.BILINEAR),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225]),
        ])
    elif img_model_name == "regnet_y_32gf":
        if not load_weight:
            model = regnet_y_32gf()
        else:
            weights = RegNet_Y_32GF_Weights.IMAGENET1K_SWAG_E2E_V1
            model = regnet_y_32gf(weights=weights)
        model.fc = torch.nn.Linear(3712, head_emb_size)
        
        preprocess = transforms.Compose([
            transforms.Resize(384, interpolation=transforms.InterpolationMode.BICUBIC),
            transforms.CenterCrop(384),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225]),
        ])
        
    elif img_model_name == "regnet_y_32gf_linear":
        if not load_weight:
            model = regnet_y_32gf()
        else:
            weights = RegNet_Y_32GF_Weights.IMAGENET1K_SWAG_LINEAR_V1
            model = regnet_y_32gf(weights=weights)
        model.fc = torch.nn.Linear(3712, head_emb_size)
        
        preprocess = transforms.Compose([
            transforms.Resize(224, interpolation=transforms.InterpolationMode.BICUBIC),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225]),
        ])
        
    elif img_model_name == "vit_b_16":
        if not load_weight:
            model = vit_b_16(image_size=384)
        else:
            weights = ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1
            model = vit_b_16(weights=weights)
        model.heads.head = torch.nn.Linear(768, head_emb_size)
        
        preprocess = transforms.Compose([
            transforms.Resize(384, interpolation=transforms.InterpolationMode.BICUBIC),
            transforms.CenterCrop(384),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225]),
        ])
    
    elif img_model_name == "vit_b_16_linear":
        if not load_weight:
            model = vit_b_16(image_size=224)
        else:
            weights = ViT_B_16_Weights.IMAGENET1K_SWAG_LINEAR_V1
            model = vit_b_16(weights=weights)
        model.heads.head = torch.nn.Linear(768, head_emb_size)
        
        preprocess = transforms.Compose([
            transforms.Resize(224, interpolation=transforms.InterpolationMode.BICUBIC),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                 std=[0.229, 0.224, 0.225]),
        ])
    
    return model, preprocess

def create_submission(pred_arr, img_names, text_emb_size):
    imgIds = [i.split('.')[0] for i in img_names]

    EMBEDDING_LENGTH = text_emb_size
    eIds = list(range(EMBEDDING_LENGTH))

    imgId_eId = [
        '_'.join(map(str, i)) for i in zip(
            np.repeat(imgIds, EMBEDDING_LENGTH),
            np.tile(range(EMBEDDING_LENGTH), len(imgIds)))]
    
    submission = pd.DataFrame(
                    index=imgId_eId,
                    data=np.array(pred_arr).flatten(),
                    columns=['val']).rename_axis('imgId_eId')
    return submission

class CustomDataSet(Dataset):
    def __init__(self, data_dir, img2prompt, img_preprocess):
        self.data_dir = data_dir
        self.img_names = list(img2prompt.keys())
        self.img2prompt = img2prompt
        self.img_preprocess = img_preprocess

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_name = self.img_names[idx]
        img_path = os.path.join(self.data_dir, img_name)
        img = Image.open(img_path)
        img_emb = self.img_preprocess(img)
        
        prompt = str(self.img2prompt[img_name])
        
        return img_name, img_emb, prompt

In [4]:
from dataclasses import dataclass, field

@dataclass
class CFG_CLASS:
    dataset_dupl_word: int
    img_model_name: str 
    lr_scheduler_name: str
    lr: float
    
    seed: int = 42
    text_emb_size: int = 384
    is_kaggle: bool = (os.environ.get('PWD') == '/kaggle/working')
    train_files_dir: str = "img2emb-data"
    
    save_model: bool = True
    img_model_test_size: float = 0.05

    loss_name: str = "cosine"
    train_only_head: bool = False
    
    train_aug: bool = True
    test_flip: bool = True
    
    full_train_epoch_num: int = 100
    max_epoch_num: int = full_train_epoch_num * 10
    full_val_epoch_num: int = 2
    early_stopping_patience: int = full_val_epoch_num * 25
    
    device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    dataset_name: str = field(init=False)
    metadata_path: str = field(init=False)
    aug_name: str = field(init=False)
    model_name: str = field(init=False)
    
    def __post_init__(self):
        self.dataset_name = f"dataset_duplwords_{self.dataset_dupl_word}"
        self.metadata_path = f"../input/metadata/metadata_duplwords_{self.dataset_dupl_word}.parquet"
        
        self.aug_name: str = f"flip_{int(self.train_aug)}"
        self.model_name = f"model_{self.img_model_name}_sch_{self.lr_scheduler_name}_lr_{self.lr:.0e}".replace("-", "_")
        
        self.train_name = f"{self.dataset_name}_{self.model_name}"
        
        self.batch_size = batch_size_config[self.img_model_name][self.is_kaggle]
        self.num_workers = self.batch_size if not self.is_kaggle else 2

In [5]:
def get_train_config(train_size, val_size, batch_size, full_train_epoch_num, full_val_epoch_num):
    max_batches_per_epoch_train = train_size // batch_size // full_train_epoch_num
    max_batches_per_epoch_val = val_size // batch_size // full_val_epoch_num
    return max_batches_per_epoch_train, max_batches_per_epoch_val

In [75]:
CFG = CFG_CLASS(
    dataset_dupl_word=5,
    img_model_name="regnet_y_32gf_linear",
    lr=5,
    lr_scheduler_name="None",
    save_model=True,
)
set_seed(CFG.seed)

In [76]:
train_data_dir = Path("../input/")
metadata = pd.read_parquet(CFG.metadata_path)
print("Metadata shape: ", metadata.shape)

full_prompt = metadata[["image_name", "prompt"]].values
train_prompt, val_prompt = train_test_split(
    full_prompt, 
    test_size=CFG.img_model_test_size, 
    random_state=CFG.seed,
    shuffle=True
)

CFG.dataset_train_size = len(train_prompt)
CFG.dataset_val_size = len(val_prompt)
CFG.max_batches_per_epoch_train, CFG.max_batches_per_epoch_val = get_train_config(
    train_size=CFG.dataset_train_size, 
    val_size=CFG.dataset_val_size, 
    batch_size=CFG.batch_size, 
    full_train_epoch_num=CFG.full_train_epoch_num, 
    full_val_epoch_num=CFG.full_val_epoch_num
)

print("Train/val")
print("Sizes: ", CFG.dataset_train_size, "/", CFG.dataset_val_size)
print("Batches per epoch: ", 
      CFG.max_batches_per_epoch_train, "/",
      CFG.max_batches_per_epoch_val)
print("Images per epoch: ", 
      CFG.max_batches_per_epoch_train * CFG.batch_size, "/",
      CFG.max_batches_per_epoch_val * CFG.batch_size)

train_prompt_dict = {img_name: prompt for img_name, prompt in train_prompt}
val_prompt = val_prompt[:10000]
val_prompt_dict = {img_name: prompt for img_name, prompt in val_prompt}

Metadata shape:  (503293, 2)
Train/val
Sizes:  478128 / 25165
Batches per epoch:  239 / 629
Images per epoch:  4780 / 12580


In [77]:
class CFG:
    seed = 42
    text_emb_size = 384
    is_kaggle = (os.environ.get('PWD') == '/kaggle/working')
    
    train_files_dir = "img2emb-data"
    
    test_flip = True
    
    # RESOURCES
    num_workers = 2
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model_names = [
        "vit_b_16", "vit_b_16_linear", "regnet_y_16gf", "regnet_y_32gf", "regnet_y_16gf_linear", "regnet_y_32gf_linear", 
    ]
    model_scores = [
        0.52504, 0.52275, 0.53276, 0.52972, 0.53928, 0.54452
    ]
    
    model_alphas = [
        1, 1, 1, 1, 1, 1
    ]

In [78]:
sys.path.append('../input/sentence-transformers-222/sentence-transformers')
from sentence_transformers import SentenceTransformer, models
st_model = SentenceTransformer('../input/sentence-transformers-222/all-MiniLM-L6-v2/')

In [79]:
test_data_dir = train_data_dir
test_image_names = os.listdir(test_data_dir)
test_prompt_dict = val_prompt_dict

pred_arr_models_list = []

for model_name in CFG.model_names:
    img_model, img_preprocess = get_img_model(img_model_name=model_name, 
                                              load_weight=False, 
                                              head_emb_size=CFG.text_emb_size)
    
    model_path = f"../input/{CFG.train_files_dir}/dataset_duplwords_5_model_{model_name}_sch_None_lr_1e_05.torch"
    img_model.load_state_dict(torch.load(model_path))
    img_model.to(CFG.device)
    img_model.eval()
    
    test_dataset = CustomDataSet(   
        data_dir=test_data_dir, 
        img2prompt=test_prompt_dict, 
        img_preprocess=img_preprocess
    )
    test_dataloader = DataLoader(
        test_dataset, 
        batch_size=batch_size_config[model_name][False], 
        shuffle=False, 
        num_workers=CFG.num_workers
    )
    
    pred_arr_model = [] 
    true_emb = []
    with torch.no_grad():
        for img_names, img_embs, prompts in tqdm(test_dataloader):
            img_embs = img_embs.to(CFG.device)
            pred = img_model(img_embs) # (batch, emb_size)
            
            if CFG.test_flip:
                img_embs_flip = transforms.functional.hflip(img_embs)
                pred_flip = img_model(img_embs_flip)
                pred = (pred + pred_flip) / 2
            
            pred = pred.cpu().detach().numpy()
                
            pred_arr_model.extend(pred) 
            true_emb.extend(st_model.encode(prompts))
                
    pred_arr_model = np.array(pred_arr_model) # (images, emb_size)
    pred_arr_models_list.append(pred_arr_model) # (models, images, emb_size)

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/209 [00:00<?, ?it/s]

  0%|          | 0/385 [00:00<?, ?it/s]

  0%|          | 0/1667 [00:00<?, ?it/s]

  0%|          | 0/385 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

In [81]:
pred_arr_flatten_list = [pred.flatten() for pred in pred_arr_models_list]
pred_arr_flatten = np.vstack(pred_arr_flatten_list).T
pred_arr_flatten.shape

(3840000, 6)

In [82]:
true_flatten = np.array(true_emb).flatten()
true_flatten.shape

(3840000,)

In [83]:
from sklearn.linear_model import LinearRegression, SGDRegressor
lr = SGDRegressor(fit_intercept=False, )
lr.fit(pred_arr_flatten, true_flatten)

In [85]:
for n, s, c in sorted(zip(CFG.model_names, CFG.model_scores, lr.coef_), key=lambda x: x[2]):
    print(n, s, c*100, sep="\t\t")

regnet_y_16gf_linear		0.53928		0.3146333928496869
vit_b_16		0.52504		0.4281575193038673
vit_b_16_linear		0.52275		0.5060009186607468
regnet_y_32gf_linear		0.54452		0.6600851190851034
regnet_y_16gf		0.53276		0.7230120487436666
regnet_y_32gf		0.52972		0.9170895882734724


In [86]:
for n, s, c in zip(CFG.model_names, CFG.model_scores, lr.coef_):
    print(n, s, c*100, sep="\t\t")

vit_b_16		0.52504		0.4281575193038673
vit_b_16_linear		0.52275		0.5060009186607468
regnet_y_16gf		0.53276		0.7230120487436666
regnet_y_32gf		0.52972		0.9170895882734724
regnet_y_16gf_linear		0.53928		0.3146333928496869
regnet_y_32gf_linear		0.54452		0.6600851190851034


#  By score

In [127]:
CFG = CFG_CLASS(
    dataset_dupl_word=5,
    img_model_name="regnet_y_32gf_linear",
    lr=5,
    lr_scheduler_name="None",
    save_model=True,
)
set_seed(CFG.seed)

In [128]:
train_data_dir = Path("../input/")
metadata = pd.read_parquet(CFG.metadata_path)
print("Metadata shape: ", metadata.shape)

full_prompt = metadata[["image_name", "prompt"]].values
train_prompt, val_prompt = train_test_split(
    full_prompt, 
    test_size=CFG.img_model_test_size, 
    random_state=CFG.seed,
    shuffle=True
)

CFG.dataset_train_size = len(train_prompt)
CFG.dataset_val_size = len(val_prompt)
CFG.max_batches_per_epoch_train, CFG.max_batches_per_epoch_val = get_train_config(
    train_size=CFG.dataset_train_size, 
    val_size=CFG.dataset_val_size, 
    batch_size=CFG.batch_size, 
    full_train_epoch_num=CFG.full_train_epoch_num, 
    full_val_epoch_num=CFG.full_val_epoch_num
)

print("Train/val")
print("Sizes: ", CFG.dataset_train_size, "/", CFG.dataset_val_size)
print("Batches per epoch: ", 
      CFG.max_batches_per_epoch_train, "/",
      CFG.max_batches_per_epoch_val)
print("Images per epoch: ", 
      CFG.max_batches_per_epoch_train * CFG.batch_size, "/",
      CFG.max_batches_per_epoch_val * CFG.batch_size)

train_prompt_dict = {img_name: prompt for img_name, prompt in train_prompt}
val_prompt = val_prompt[:10000]
val_prompt_dict = {img_name: prompt for img_name, prompt in val_prompt}

Metadata shape:  (503293, 2)
Train/val
Sizes:  478128 / 25165
Batches per epoch:  239 / 629
Images per epoch:  4780 / 12580


In [129]:
class CFG:
    seed = 42
    text_emb_size = 384
    is_kaggle = (os.environ.get('PWD') == '/kaggle/working')
    
    train_files_dir = "img2emb-data"
    
    test_flip = True
    
    # RESOURCES
    num_workers = 2
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model_names = [
        "vit_b_16", "vit_b_16_linear", "regnet_y_16gf", "regnet_y_32gf", "regnet_y_16gf_linear", "regnet_y_32gf_linear", 
    ]
    model_scores = [
        0.52504, 0.52275, 0.53276, 0.52972, 0.53928, 0.54452
    ]
    
    model_alphas = [
        1, 1, 1, 1, 1, 1
    ]

In [130]:
test_data_dir = train_data_dir
test_image_names = os.listdir(test_data_dir)
test_prompt_dict = val_prompt_dict

pred_arr_models_list = []

for model_name in CFG.model_names:
    img_model, img_preprocess = get_img_model(img_model_name=model_name, 
                                              load_weight=False, 
                                              head_emb_size=CFG.text_emb_size)
    
    model_path = f"../input/{CFG.train_files_dir}/dataset_duplwords_5_model_{model_name}_sch_None_lr_1e_05.torch"
    img_model.load_state_dict(torch.load(model_path))
    img_model.to(CFG.device)
    img_model.eval()
    
    test_dataset = CustomDataSet(   
        data_dir=test_data_dir, 
        img2prompt=test_prompt_dict, 
        img_preprocess=img_preprocess
    )
    test_dataloader = DataLoader(
        test_dataset, 
        batch_size=batch_size_config[model_name][False], 
        shuffle=False, 
        num_workers=CFG.num_workers
    )
    
    pred_arr_model = [] 
    true_emb = []
    with torch.no_grad():
        for img_names, img_embs, prompts in tqdm(test_dataloader):
            img_embs = img_embs.to(CFG.device)
            pred = img_model(img_embs) # (batch, emb_size)
            
            if CFG.test_flip:
                img_embs_flip = transforms.functional.hflip(img_embs)
                pred_flip = img_model(img_embs_flip)
                pred = (pred + pred_flip) / 2
            
            pred = pred.cpu().detach().numpy()
                
            pred_arr_model.extend(pred) 
            true_emb.extend(st_model.encode(prompts))
                
    pred_arr_model = np.array(pred_arr_model) # (images, emb_size)
    pred_arr_models_list.append(pred_arr_model) # (models, images, emb_size)

  0%|          | 0/625 [00:00<?, ?it/s]

  0%|          | 0/209 [00:00<?, ?it/s]

  0%|          | 0/385 [00:00<?, ?it/s]

  0%|          | 0/1667 [00:00<?, ?it/s]

  0%|          | 0/385 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

In [131]:
def get_loss(loss_name, device):
    if loss_name == "cosine":
        loss_fn = torch.nn.CosineEmbeddingLoss()
        return lambda pred, true: loss_fn(pred.to(device), true.to(device), torch.ones(pred.size(0)).to(device))
    
loss = get_loss("cosine", CFG.device)

In [132]:
true_emb = np.array(true_emb)

In [133]:
from itertools import product

In [136]:
pred_arr_models_list[0].shape

(10000, 384)

In [138]:
m_best = None
s_best = 0

n_images = pred_arr_models_list[0].shape[0]
n_models = len(pred_arr_models_list)

model_alphas_list = product(np.linspace(0, 1, 6), repeat=6)
for model_alphas in tqdm(model_alphas_list):
    img2emb_pred_arr = np.zeros((n_images, CFG.text_emb_size))
    for j in range(n_models):
        img2emb_pred_arr += pred_arr_models_list[j] * model_alphas[j] / sum(CFG.model_alphas)
    s = 1 - float(loss(torch.tensor(img2emb_pred_arr), torch.tensor(true_emb)))
    
    if s > s_best:
        s_best = s
        m_best = model_alphas
        print(s, model_alphas)

0it [00:00, ?it/s]

0.6360373714691414 (0.0, 0.0, 0.0, 0.0, 0.0, 0.2)
0.636037371469589 (0.0, 0.0, 0.0, 0.0, 0.0, 0.4)
0.636037371469701 (0.0, 0.0, 0.0, 0.0, 0.0, 0.8)
0.6460583992867055 (0.0, 0.0, 0.0, 0.0, 0.2, 0.2)
0.6460583992868194 (0.0, 0.0, 0.0, 0.0, 0.4, 0.4)
0.6462382327271485 (0.0, 0.0, 0.0, 0.0, 0.4, 0.6000000000000001)
0.6462983750600009 (0.0, 0.0, 0.0, 0.0, 0.6000000000000001, 0.8)
0.6473434412852217 (0.0, 0.0, 0.0, 0.2, 0.0, 0.2)
0.6519247276995987 (0.0, 0.0, 0.0, 0.2, 0.2, 0.2)
0.6525792136065508 (0.0, 0.0, 0.0, 0.4, 0.2, 0.2)
0.6525792136066012 (0.0, 0.0, 0.0, 0.8, 0.4, 0.4)
0.6526310065901999 (0.0, 0.0, 0.0, 1.0, 0.6000000000000001, 0.6000000000000001)
0.6534682281288349 (0.0, 0.0, 0.2, 0.2, 0.0, 0.2)
0.6539065814320673 (0.0, 0.0, 0.2, 0.2, 0.2, 0.2)
0.6547815235482886 (0.0, 0.0, 0.2, 0.4, 0.2, 0.2)
0.6549707218938094 (0.0, 0.0, 0.4, 0.6000000000000001, 0.2, 0.4)
0.6549953720944401 (0.0, 0.0, 0.6000000000000001, 1.0, 0.4, 0.6000000000000001)
0.6551278842473358 (0.0, 0.2, 0.0, 0.4, 0.2, 0.

In [125]:
m_best, s_best

((0.2, 0.2, 0.4, 0.8, 0.2, 0.4), 0.6577763351279051)

In [139]:
import itertools

In [144]:
m_best = None
s_best = 0

n_images = pred_arr_models_list[0].shape[0]
n_models = len(pred_arr_models_list)

model_alphas_list = list(
    itertools.product(
        [0.15, 0.2, 0.25], [0.15, 0.2, 0.25], [0.35, 0.4, 0.45], [0.75, 0.8, 0.85], [0.15, 0.2, 0.25], [0.35, 0.4, 0.45]
    )
)
for model_alphas in tqdm(model_alphas_list):
    img2emb_pred_arr = np.zeros((n_images, CFG.text_emb_size))
    for j in range(n_models):
        img2emb_pred_arr += pred_arr_models_list[j] * model_alphas[j] / sum(CFG.model_alphas)
    s = 1 - float(loss(torch.tensor(img2emb_pred_arr), torch.tensor(true_emb)))
    
    if s > s_best:
        s_best = s
        m_best = model_alphas
        print(s, model_alphas)

  0%|          | 0/729 [00:00<?, ?it/s]

0.6577123514786629 (0.15, 0.15, 0.35, 0.75, 0.15, 0.35)
0.6577463180680058 (0.15, 0.2, 0.35, 0.75, 0.15, 0.35)
0.6577715660789212 (0.2, 0.15, 0.35, 0.75, 0.15, 0.35)
0.6577743558078629 (0.2, 0.15, 0.4, 0.75, 0.15, 0.35)
0.6577784906571984 (0.2, 0.2, 0.35, 0.75, 0.15, 0.35)
0.6577887378473193 (0.2, 0.2, 0.4, 0.75, 0.15, 0.35)
0.6577897141620852 (0.2, 0.2, 0.4, 0.75, 0.15, 0.4)
0.6577898301689076 (0.2, 0.2, 0.4, 0.8, 0.2, 0.4)


In [149]:
a = np.array([0.2, 0.2, 0.4, 0.8, 0.2, 0.4])
(a - a.min()) / (a.max() - a.min())

array([0.        , 0.        , 0.33333333, 1.        , 0.        ,
       0.33333333])

In [150]:
a = np.array([0.43, 0.51, 0.72, 0.92, 0.31, 0.66])
(a - a.min()) / (a.max() - a.min())

array([0.19672131, 0.32786885, 0.67213115, 1.        , 0.        ,
       0.57377049])

In [162]:
a = np.array([0.52, 0.52504, 0.52275, 0.53276, 0.52972, 0.53928, 0.54452])
((a - a.min()) / (a.max() - a.min())) [1:]

array([0.20554649, 0.11215334, 0.52039152, 0.39641109, 0.7862969 ,
       1.        ])