In [1]:
import torch
print(torch.cuda.is_available())

True


In [2]:
from pdata import PersonalizedMMUDataset, PersonalizedT2IDataset, get_personalized_mmu_dataloader, get_personalized_t2i_dataloader
from lightning.pytorch.utilities import CombinedLoader

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from PIL import Image

from models import Showo, MAGVITv2, get_mask_chedule
from training.prompting_utils import UniversalPrompting, create_attention_mask_predict_next, create_attention_mask_for_mmu
from training.utils import get_config, flatten_omega_conf, mask_or_random_replace_tokens, AverageMeter
from transformers import AutoTokenizer
from models.clip_encoder import CLIPVisionTower
from transformers import CLIPImageProcessor
from llava.llava import conversation as conversation_lib

conversation_lib.default_conversation = conversation_lib.conv_templates["phi1.5"]

import os
from omegaconf import DictConfig, ListConfig, OmegaConf
config = OmegaConf.load('configs/showo_demo.yaml')
# device setup
device = torch.device("cuda:7")

  from .autonotebook import tqdm as notebook_tqdm


[2025-03-16 20:07:09,298] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/hpyky/miniconda3/envs/showo/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status




In [3]:
# show o tokenizer setup and adding special tokens to universal prompting
# llm model : 'microsoft/phi-1_5'
tokenizer = AutoTokenizer.from_pretrained(config.model.showo.llm_model_path, padding_side ="left")
uni_prompting = UniversalPrompting(tokenizer, max_text_len=config.dataset.preprocessing.max_seq_length,
                                       special_tokens=("<|soi|>", "<|eoi|>", "<|sov|>", "<|eov|>", "<|t2i|>", "<|mmu|>", "<|t2v|>", "<|v2v|>", "<|lvg|>"),
                                       ignore_id=-100, cond_dropout_prob=config.training.cond_dropout_prob)

# setting up the magvit-v2, for t2i
vq_model = MAGVITv2.from_pretrained(config.model.vq_model.vq_model_name).to(device)
# vq_model.requires_grad_(False)
# vq_model.eval()

# setting up vision tower: clip-vit only for mmu
# vision_tower_name =config.clip_path
# vision_tower = CLIPVisionTower(vision_tower_name).to(device)
# clip_image_processor = CLIPImageProcessor.from_pretrained(vision_tower_name)

# setting up the showo model 
model = Showo.from_pretrained(config.model.showo.pretrained_model_path).to(device)
# model.eval()

# setting up the parameters
temperature = 1  # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
top_k = 1  # retain only the top_k most likely tokens, clamp others to have 0 probability
# LLAVA_SYSTEM_PROMPT = "A chat between a curious user and an artificial intelligence assistant. " \
#                 "The assistant gives helpful, detailed, and polite answers to the user's questions."
# LLAVA_SYSTEM_PROMPT_LEN = 28

Working with z of shape (1, 13, 16, 16) = 3328 dimensions.
Look-up free quantizer with codebook size: 8192


The config attributes {'mask_token_id': 58497} were passed to Showo, but are not expected and will be ignored. Please verify your config.json configuration file.
  if self.w_clip_vit:


attention implementation:  sdpa


In [4]:
# print(model.showo.get_input_embeddings())
model.showo.get_input_embeddings().num_embeddings
model.showo.get_input_embeddings().num_embeddings - len(tokenizer)
model.showo.get_input_embeddings().weight.data.shape
model.showo.lm_head.weight.shape
model.showo.lm_head.bias.shape

torch.Size([58498])

In [5]:
data_root = "/home/hpyky/full_mcdata"
concept = "dunpai"

In [6]:
nums_new_token_i = 16

#################################
new_tokens = [f"<{concept}>"] + [f"<token_{i}>" for i in range(nums_new_token_i)]
num_new_tokens = len(new_tokens)  # 17

# 已知的原始参数
# 文本 token 数量（ID 0-50304）
original_text_vocab_size = len(tokenizer)  
# Image token 数量（原 ID 50305-58497）
original_image_vocab_size = model.showo.get_input_embeddings().num_embeddings - len(tokenizer)

original_total_vocab = original_text_vocab_size + original_image_vocab_size  # 58498

# 新的参数
new_text_vocab_size = original_text_vocab_size + num_new_tokens  # 50305 + 17 = 50322
new_total_vocab = original_total_vocab + num_new_tokens          # 58498 + 17 = 58515

# ------------------------------
# Step 1: 修改 Tokenizer 的词汇表
# ------------------------------

# 添加新 token 到 50305-50321 的位置
num_new_tokens = tokenizer.add_tokens(new_tokens)
new_token_ids = tokenizer.convert_tokens_to_ids(new_tokens)
print("新 token ID:", new_token_ids)  # 应输出 50305-50321

# ------------------------------
# Step 2: 调整模型的权重
# ------------------------------
with torch.no_grad():
    # 获取嵌入层权重
    embeddings = model.showo.get_input_embeddings().weight.data
    
    # 扩展嵌入层（58498 -> 58515）
    model.showo.resize_token_embeddings(new_total_vocab)
    # new_embeddings = model.showo.get_input_embeddings().weight.data

    # 将原 Image Token 权重后移 17 位
    original_image_weights = embeddings[original_text_vocab_size:original_total_vocab].clone()
    model.showo.get_input_embeddings().weight.data[new_text_vocab_size:new_total_vocab] = original_image_weights
    
    # 初始化新 token 的权重（用原文本最后 17 个 token）
    # new_text_weights = embeddings[original_text_vocab_size - num_new_tokens : original_text_vocab_size].clone()
    # model.showo.get_input_embeddings().weight.data[original_text_vocab_size : new_text_vocab_size] = new_text_weights
    # print(model.showo.lm_head.weight.data.shape[1])
    # 处理 lm_head（假设与嵌入层共享权重）
    if model.showo.lm_head.weight.data.shape[0] == new_total_vocab:
        # 扩展 lm_head 权重
        lm_head = model.showo.lm_head
        new_lm_head = torch.nn.Linear(
            lm_head.in_features, 
            new_total_vocab, 
            bias=hasattr(lm_head, 'bias')
        )
        new_lm_head.weight.data = lm_head.weight.data.clone()
        new_lm_head.weight.data[new_text_vocab_size:new_total_vocab] = lm_head.weight.data[original_text_vocab_size:original_total_vocab]
        # new_lm_head.weight.data[original_text_vocab_size:new_text_vocab_size] = lm_head.weight.data[original_text_vocab_size - num_new_tokens : original_text_vocab_size]
        if hasattr(lm_head, 'bias'):
            new_lm_head.bias.data = lm_head.bias.data.clone()
            new_lm_head.bias.data[new_text_vocab_size:new_total_vocab] = lm_head.bias.data[original_text_vocab_size:original_total_vocab]
            # new_lm_head.bias.data[original_text_vocab_size:new_text_vocab_size] = lm_head.bias.data[original_text_vocab_size - num_new_tokens : original_text_vocab_size]
        
        model.showo.lm_head = new_lm_head
    else:
        raise ValueError("lm_head weights do not match the input embeddings!")

index_no_updates = torch.ones((new_total_vocab,), dtype=torch.bool)
index_no_updates[new_token_ids] = False
# ------------------------------
# 验证
# ------------------------------
# 检查新 token 的 ID
print("新增文本 token ID:", [tokenizer.convert_tokens_to_ids(t) for t in new_tokens])  # 应输出 50305-50321

# 检查一个原 Image Token 的新 ID
sample_image_token = tokenizer.convert_ids_to_tokens(original_text_vocab_size)  # 原 ID 50305
print(f"Concept Token '{sample_image_token}' 的新 ID:", tokenizer.convert_tokens_to_ids(sample_image_token))  # 应输出 50322

# 检查嵌入层形状
print("嵌入层大小:", model.showo.get_input_embeddings().weight.shape)  # 应显示 torch.Size([58515, 2048])

# 检查 index_no_updates 中 True 的位置和数量，True 应该是 new token ids
print("index_no_updates 中 False 的位置:", torch.nonzero(~index_no_updates).squeeze())  # 应输出 50305-50321
print("index_no_updates 中 True 的数量:", torch.sum(index_no_updates))  # 应输出 58498

with torch.no_grad():
    orig_embeds = model.showo.get_input_embeddings().weight.data.clone()
    orig_lm_head_weight = model.showo.lm_head.weight.data.clone()
    orig_lm_head_bias = model.showo.lm_head.bias.data.clone()

新 token ID: [50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313, 50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321]
新增文本 token ID: [50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313, 50314, 50315, 50316, 50317, 50318, 50319, 50320, 50321]
Concept Token '<dunpai>' 的新 ID: 50305
嵌入层大小: torch.Size([58515, 2048])
index_no_updates 中 False 的位置: tensor([50305, 50306, 50307, 50308, 50309, 50310, 50311, 50312, 50313, 50314,
        50315, 50316, 50317, 50318, 50319, 50320, 50321])
index_no_updates 中 True 的数量: tensor(58498)


In [7]:
concept_embeds = model.showo.get_input_embeddings().weight.data[new_token_ids]
concept_lm_nead_wight = model.showo.lm_head.weight.data[new_token_ids]
concept_lm_nead_bias = model.showo.lm_head.bias.data[new_token_ids]

In [8]:
concept_embeds.shape, concept_lm_nead_wight.shape, concept_lm_nead_bias.shape

(torch.Size([17, 2048]), torch.Size([17, 2048]), torch.Size([17]))

In [9]:
uni_prompting.sptids_dict

{'<|soi|>': tensor([50296]),
 '<|eoi|>': tensor([50297]),
 '<|sov|>': tensor([50298]),
 '<|eov|>': tensor([50299]),
 '<|t2i|>': tensor([50300]),
 '<|mmu|>': tensor([50301]),
 '<|t2v|>': tensor([50302]),
 '<|v2v|>': tensor([50303]),
 '<|lvg|>': tensor([50304]),
 '<|sot|>': tensor([50256]),
 '<|eot|>': tensor([50256]),
 '<|pad|>': tensor([50295])}

In [10]:
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,  # 低秩矩阵的秩
    lora_alpha=16,  # 调整LoRA层的系数
    lora_dropout=0.0,  # Dropout率
    task_type="CAUSAL_LM",  # 任务类型为因果语言模型
    target_modules = [
    "fc1",
    "v_proj",
    "q_proj",
    # "k_proj",
    "fc2"
  ]
)

model.showo = get_peft_model(model.showo, lora_config)

In [11]:
model

Showo(
  (showo): PeftModelForCausalLM(
    (base_model): LoraModel(
      (model): PhiForCausalLM(
        (model): PhiModel(
          (embed_tokens): Embedding(58515, 2048)
          (embed_dropout): Dropout(p=0.0, inplace=False)
          (layers): ModuleList(
            (0-23): 24 x PhiDecoderLayer(
              (self_attn): PhiSdpaAttention(
                (q_proj): lora.Linear(
                  (base_layer): Linear(in_features=2048, out_features=2048, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Identity()
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=2048, out_features=8, bias=False)
                  )
                  (lora_B): ModuleDict(
                    (default): Linear(in_features=8, out_features=2048, bias=False)
                  )
                  (lora_embedding_A): ParameterDict()
                  (lora_embedding_B): ParameterDict()
                )


In [12]:
vq_model.requires_grad_ = False
vq_model.eval()
model.train()

for name, param in model.named_parameters():
    if "lora" in name or "embed_tokens" in name or "lm_head" in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

# trainable_params = [model.showo.get_input_embeddings().weight, model.showo.lm_head.weight, model.showo.lm_head.bias]

trainable_params = []
for name, param in model.named_parameters():
    if param.requires_grad:
        # 将所有需要训练的参数添加到训练参数列表中
        trainable_params.append(param)

optimizer = torch.optim.AdamW(
            trainable_params, # for optimize the embeddings and the head
            lr=1e-2,
            betas=(0.9, 0.999),
            weight_decay=1e-2,
            eps=1e-08,
        )
for names, p in model.named_parameters():
    if p.requires_grad:
        print(f"{names} requires_grad") # embed_token, lm_head会更新
        
#统计名称含有 "lora" 的模块的可训练参数数量
lora_params = list(filter(lambda kv: "lora" in kv[0], model.named_parameters()))
lora_params_num = sum(p.numel() for n, p in lora_params)
print(f"LoRA parameters: {lora_params_num}")
# LoRA: Q, V, mlp.fc1, mlp.fc2
# token 可训练参数 2048*58515*2 + 58515 = 239735955
# 统计所有可训练参数数量
trainable_params_num = sum(p.numel() for p in trainable_params)
print(f"Trainable parameters: {trainable_params_num}")

showo.base_model.model.model.embed_tokens.weight requires_grad
showo.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight requires_grad
showo.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight requires_grad
showo.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight requires_grad
showo.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight requires_grad
showo.base_model.model.model.layers.0.mlp.fc1.lora_A.default.weight requires_grad
showo.base_model.model.model.layers.0.mlp.fc1.lora_B.default.weight requires_grad
showo.base_model.model.model.layers.0.mlp.fc2.lora_A.default.weight requires_grad
showo.base_model.model.model.layers.0.mlp.fc2.lora_B.default.weight requires_grad
showo.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight requires_grad
showo.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight requires_grad
showo.base_model.model.model.layers.1.self_attn.v_proj.lora_A.d

In [13]:
model.config.mask_token_id = model.showo.get_input_embeddings().num_embeddings - 1
model.mask_token_id = model.showo.get_input_embeddings().num_embeddings - 1

In [14]:
mask_schedule = get_mask_chedule(config.training.get("mask_schedule", "cosine"))
mask_id = model.mask_token_id
mask_dtype = model.showo.base_model.model.model.embed_tokens.weight.dtype

In [15]:

# t2i_dataset = PersonalizedT2IDataset(data_root, concept)
# t2i_dataloader = DataLoader(t2i_dataset, batch_size=5, shuffle=True, num_workers=10, pin_memory=True)

mmu_dataloader = get_personalized_mmu_dataloader(data_root, concept, tokenizer, batch_size=5, num_workers=0, max_length=128)
t2i_dataloader = get_personalized_t2i_dataloader(data_root, concept, tokenizer, batch_size=2, num_workers=0, max_length=128)


iterables = {
    'mmu_flow': mmu_dataloader,
    't2i_flow': t2i_dataloader
}


combined_dataloader = CombinedLoader(iterables, mode="max_size_cycle")

# Before adding the new tokens, the vocab size is 58498
# vocab size = 58498 = 50295  llm vocabsize
#                    + 10     <|soi|> <|eoi|> <|sov|> <|eov|> <|t2i|> <|mmu|> <|t2v|> <|v2v|> <|lvg|> <|pad|>
#                    + 8192   vq model codebook size
#                    + 1      mask token (token id == 58497)
from typing import Union


uni_prompting.sptids_dict
# {'<|soi|>': tensor([50296]),
#  '<|eoi|>': tensor([50297]),
#  '<|sov|>': tensor([50298]),
#  '<|eov|>': tensor([50299]),
#  '<|t2i|>': tensor([50300]),
#  '<|mmu|>': tensor([50301]),
#  '<|t2v|>': tensor([50302]),
#  '<|v2v|>': tensor([50303]),
#  '<|lvg|>': tensor([50304]),
#  '<|sot|>': tensor([50256]),
#  '<|eot|>': tensor([50256]),
#  '<|pad|>': tensor([50295])}

# uni_prompting.text_tokenizer == tokenizer
def prepare_inputs_and_labels(
        pixel_values_or_image_ids: Union[torch.FloatTensor, torch.LongTensor],
        texts: Union[str, str],
        min_masking_rate: float = 0.0,
        is_train: bool = True,
):

    image_tokens = vq_model.get_code(pixel_values_or_image_ids)
    image_tokens = image_tokens + len(uni_prompting.text_tokenizer)

    # create MLM mask and labels
    input_ids, labels, loss_weight, mask_prob = mask_or_random_replace_tokens(
        image_tokens,
        mask_id,
        config,
        mask_schedule=mask_schedule,
        is_train=is_train,
    )
    input_ids, masks, labels = uni_prompting((texts, input_ids, labels), 't2i')

    return input_ids, labels, mask_prob, image_tokens

Formatting llava instruction data


In [16]:
list_combined_dataloader = list(combined_dataloader)
# one_batch_mmu = list_combined_dataloader[0][0]['mmu_flow']
one_batch_t2i = list_combined_dataloader[0][0]['t2i_flow']

# one_batch_mmu = next(iter(mmu_dataloader))

In [None]:
model.output_size = new_total_vocab
save_path = os.path.join("saves", concept, "lora_merged_qvfc1fc2")
os.makedirs(save_path, exist_ok=True)
for epoch in range(0, 500):
    print(f"Epoch {epoch+1}")
    loss_list = []
    loss_t2i_list = []
    loss_mmu_list = []
    for batch, batch_idx, dataloader_idx in tqdm(list_combined_dataloader):
        batch_size_mmu = batch["mmu_flow"]["images"].shape[0]
        batch_size_t2i = batch["t2i_flow"]["images"].shape[0]
        
        # t2i format
        pixel_values, texts = batch["t2i_flow"]["images"], batch["t2i_flow"]["conditions"]
        pixel_values = pixel_values.to(device)
        input_ids, labels, mask_prob, image_tokens_ori = prepare_inputs_and_labels(pixel_values, texts, is_train=True)
        attention_mask = create_attention_mask_predict_next(input_ids,
                                                                pad_id=int(uni_prompting.sptids_dict['<|pad|>']),
                                                                soi_id=int(uni_prompting.sptids_dict['<|soi|>']),
                                                                eoi_id=int(uni_prompting.sptids_dict['<|eoi|>']),
                                                                rm_pad_in_image=True,
                                                                return_inverse_mask=True)
        attention_mask = attention_mask.to(mask_dtype)
        # 美国队长的盾牌
        
        # mmu format
        pixel_values_mmu, input_ids_mmu, labels_mmu = (batch["mmu_flow"]["images"],
                                                      batch["mmu_flow"]["input_ids"],
                                                      batch["mmu_flow"]["labels"])
        pixel_values_mmu = pixel_values_mmu.to(device, non_blocking=True)
        input_ids_mmu = input_ids_mmu.to(device, non_blocking=True)
        image_tokens_mmu = vq_model.get_code(pixel_values_mmu)
        image_tokens_mmu = image_tokens_mmu + len(uni_prompting.text_tokenizer)
        
        input_ids_mmu = torch.cat([
                    (torch.ones(input_ids_mmu.shape[0], 1) * uni_prompting.sptids_dict['<|mmu|>']).to(
                        device),
                    (torch.ones(input_ids_mmu.shape[0], 1) * uni_prompting.sptids_dict['<|soi|>']).to(
                        device),
                    image_tokens_mmu,
                    (torch.ones(input_ids_mmu.shape[0], 1) * uni_prompting.sptids_dict['<|eoi|>']).to(
                        device),
                    input_ids_mmu,
                ], dim=1).long()

        labels_mmu = torch.cat([
                    (torch.ones(input_ids_mmu.shape[0], 1) * uni_prompting.ignore_id).to(device),
                    (torch.ones(input_ids_mmu.shape[0], 1) * uni_prompting.ignore_id).to(device),
                    torch.ones_like(image_tokens_mmu) * uni_prompting.ignore_id,
                    (torch.ones(input_ids_mmu.shape[0], 1) * uni_prompting.ignore_id).to(device),
                    labels_mmu.to(device)
                ], dim=1).long()
        
        
        attention_mask_mmu = create_attention_mask_for_mmu(input_ids_mmu.to(input_ids.device),
                                                               eoi_id=int(uni_prompting.sptids_dict['<|eoi|>']))
        attention_mask_mmu = attention_mask_mmu.to(mask_dtype)
        attention_mask = torch.cat([attention_mask, attention_mask_mmu], dim=0)
        input_ids = torch.cat((input_ids, input_ids_mmu.to(input_ids.device)), dim=0)
        labels = torch.cat((labels, labels_mmu.to(input_ids.device)), dim=0)
        
        optimizer.zero_grad()
        
        logits, loss_t2i, loss_lm, loss_mmu = model(
                    input_ids=input_ids,
                    input_embeddings=None,
                    attention_mask=attention_mask,
                    labels=labels,
                    label_smoothing=0.0,
                    batch_size_t2i=batch_size_t2i,
                    batch_size_lm=0,
                    batch_size_mmu=batch_size_mmu,
                    max_seq_length=128,
                )
        loss = 0.8 * loss_t2i + 0.2 * loss_mmu
        # loss = loss_t2i
        loss.backward()
        optimizer.step()
        loss_list.append(loss.item())
        loss_t2i_list.append(loss_t2i.item())
        loss_mmu_list.append(loss_mmu.item())
        # tqdm.set_postfix(loss=loss.item(), loss_t2i=loss_t2i.item(), loss_mmu=loss_mmu.item())
        # tqdm.write(f"loss: {loss.item()}, loss_t2i: {loss_t2i.item()}, loss_mmu: {loss_mmu.item()}")
        # 恢复原始权重
        with torch.no_grad():
            model.showo.get_input_embeddings().weight.data[index_no_updates] = orig_embeds[index_no_updates]
            model.showo.lm_head.weight.data[index_no_updates] = orig_lm_head_weight[index_no_updates]
            model.showo.lm_head.bias.data[index_no_updates] = orig_lm_head_bias[index_no_updates]
    print(f"Epoch {epoch+1} loss: {np.mean(loss_list)}, loss_t2i: {np.mean(loss_t2i_list)}, loss_mmu: {np.mean(loss_mmu_list)}")
    print(f"  Token-Norm: {model.showo.get_input_embeddings().weight[new_token_ids].norm().item()}")
    print(f"  index_no_updates-Token-Norm: {model.showo.get_input_embeddings().weight[index_no_updates].norm().item()}")
    print(f"  LM-Head-Weight-Norm: {model.showo.lm_head.weight[new_token_ids].norm().item()}")
    print(f"  index_no_updates-LM-Head-Weight-Norm: {model.showo.lm_head.weight[index_no_updates].norm().item()}")
    print(f"  LM-Head-Bias-Norm: {model.showo.lm_head.bias[new_token_ids].norm().item()}")
    print(f"  index_no_updates-LM-Head-Bias-Norm: {model.showo.lm_head.bias[index_no_updates].norm().item()}")
    
    
    # print(f"Epoch {epoch} loss: {np.mean(loss_list)}, loss_t2i: {np.mean(loss_t2i_list)}")
    if (epoch+1) % 10 == 0:
        save_path_embed = os.path.join(save_path, f"epoch_{epoch+1}_embed.pt")
        save_path_lm_head_weight = os.path.join(save_path, f"epoch_{epoch+1}_lm_head_weight.pt")
        save_path_lm_head_bias = os.path.join(save_path, f"epoch_{epoch+1}_lm_head_bias.pt")
        
        torch.save(model.showo.get_input_embeddings().weight.data[new_token_ids], save_path_embed)
        torch.save(model.showo.lm_head.weight.data[new_token_ids], save_path_lm_head_weight)
        torch.save(model.showo.lm_head.bias.data[new_token_ids], save_path_lm_head_bias)
        model.showo.save_pretrained(os.path.join(save_path, f"epoch_{epoch+1}_lora_model"))
        

Epoch 1


  0%|          | 0/48 [00:00<?, ?it/s]

100%|██████████| 48/48 [00:21<00:00,  2.24it/s]


Epoch 1 loss: 10.865355670452118, loss_t2i: 10.843851079543432, loss_mmu: 10.951373517513275
  Token-Norm: 14.454191207885742
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.824005126953125
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1825370788574219
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 2


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 2 loss: 8.393536498149237, loss_t2i: 8.941564778486887, loss_mmu: 6.201422800620397
  Token-Norm: 15.549836158752441
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.853802680969238
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.178760051727295
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 3


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 3 loss: 7.670824448267619, loss_t2i: 8.503815541664759, loss_mmu: 4.338859980305036
  Token-Norm: 15.899003028869629
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.808540344238281
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.175623893737793
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 4


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 4 loss: 7.384706387917201, loss_t2i: 8.313751220703125, loss_mmu: 3.668526530265808
  Token-Norm: 17.502897262573242
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.794952392578125
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.173599123954773
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 5


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 5 loss: 7.303807278474172, loss_t2i: 8.280677189429602, loss_mmu: 3.3963273763656616
  Token-Norm: 19.052400588989258
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.763790130615234
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1703484058380127
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 6


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 6 loss: 7.2273218135039015, loss_t2i: 8.284037113189697, loss_mmu: 3.0004598100980124
  Token-Norm: 19.699657440185547
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.721193313598633
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1643640995025635
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 7


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 7 loss: 7.102208882570267, loss_t2i: 8.178779204686483, loss_mmu: 2.7959269881248474
  Token-Norm: 20.798175811767578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.649687767028809
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.159875750541687
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 8


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 8 loss: 6.996829082568486, loss_t2i: 8.085861712694168, loss_mmu: 2.6406981398661933
  Token-Norm: 23.01668357849121
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.5972900390625
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1554630994796753
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 9


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 9 loss: 6.845438142617543, loss_t2i: 7.908754567305247, loss_mmu: 2.592171862721443
  Token-Norm: 24.90275764465332
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.535924911499023
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1497724056243896
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 10


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 10 loss: 6.587938110033671, loss_t2i: 7.683952957391739, loss_mmu: 2.2038781717419624
  Token-Norm: 25.285619735717773
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.477255821228027
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.144661545753479
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438




Epoch 11


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 11 loss: 7.945301065842311, loss_t2i: 8.836096207300821, loss_mmu: 4.382119913895925
  Token-Norm: 26.270259857177734
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.689845085144043
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1481105089187622
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 12


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 12 loss: 8.603441655635834, loss_t2i: 9.216173201799393, loss_mmu: 6.152515113353729
  Token-Norm: 26.943588256835938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.846661567687988
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1419345140457153
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 13


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 13 loss: 7.4240294098854065, loss_t2i: 8.268123507499695, loss_mmu: 4.04765238861243
  Token-Norm: 27.009490966796875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.83233642578125
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1353858709335327
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 14


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 14 loss: 7.256800363461177, loss_t2i: 8.150108834107717, loss_mmu: 3.683565859993299
  Token-Norm: 27.358455657958984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.81070613861084
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1302770376205444
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 15


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 15 loss: 7.198319802681605, loss_t2i: 8.108300775289536, loss_mmu: 3.558395425478617
  Token-Norm: 27.359344482421875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.763711929321289
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1257214546203613
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 16


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 16 loss: 7.018386234839757, loss_t2i: 7.966349840164185, loss_mmu: 3.2265315552552543
  Token-Norm: 27.245519638061523
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.697855949401855
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1211599111557007
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 17


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 17 loss: 9.23206259806951, loss_t2i: 9.814344863096872, loss_mmu: 6.902932946880658
  Token-Norm: 27.338457107543945
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.7680025100708
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2171483039855957
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 18


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 18 loss: 9.231357326110205, loss_t2i: 9.513540675242742, loss_mmu: 8.102623691161474
  Token-Norm: 28.573163986206055
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.60980224609375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3151681423187256
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 19


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 19 loss: 8.256396571795145, loss_t2i: 8.79797508319219, loss_mmu: 6.090081969896953
  Token-Norm: 28.643707275390625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.5428466796875
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3090159893035889
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 20


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 20 loss: 7.862963378429413, loss_t2i: 8.539485057195028, loss_mmu: 5.156876628597577
  Token-Norm: 28.528396606445312
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.44778060913086
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3025290966033936
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 21


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 21 loss: 7.660030881563823, loss_t2i: 8.380016793807348, loss_mmu: 4.780086616675059
  Token-Norm: 28.445343017578125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.35334014892578
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2961338758468628
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 22


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 22 loss: 7.671692957480748, loss_t2i: 8.430013984441757, loss_mmu: 4.638408238689105
  Token-Norm: 28.36886978149414
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.254558563232422
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.290259838104248
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 23


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 23 loss: 7.591305106878281, loss_t2i: 8.379498273134232, loss_mmu: 4.438531960050265
  Token-Norm: 28.2534236907959
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.15986442565918
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2842377424240112
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 24


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 24 loss: 7.569946736097336, loss_t2i: 8.383324573437372, loss_mmu: 4.31643471121788
  Token-Norm: 28.13694190979004
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.066606521606445
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2784144878387451
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 25


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 25 loss: 7.619782169659932, loss_t2i: 8.446120609839758, loss_mmu: 4.314427554607391
  Token-Norm: 28.044862747192383
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.981704711914062
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2721514701843262
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 26


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 26 loss: 7.42216153939565, loss_t2i: 8.279497643311819, loss_mmu: 3.992816686630249
  Token-Norm: 27.96575927734375
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.893775939941406
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2661018371582031
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 27


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 27 loss: 7.367426723241806, loss_t2i: 8.2759980460008, loss_mmu: 3.7331409553686776
  Token-Norm: 27.844745635986328
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.804616928100586
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2599728107452393
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 28


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 28 loss: 9.213299105564753, loss_t2i: 9.13698723912239, loss_mmu: 9.518546218673388
  Token-Norm: 28.213224411010742
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.332439422607422
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3096034526824951
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 29


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 29 loss: 13.472679873307547, loss_t2i: 9.048200408617655, loss_mmu: 31.170597076416016
  Token-Norm: 28.462142944335938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.325098037719727
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3658875226974487
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 30


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 30 loss: 11.804857015609741, loss_t2i: 8.993310948212942, loss_mmu: 23.051040669282276
  Token-Norm: 28.320363998413086
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.52890396118164
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3676973581314087
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 31


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 31 loss: 11.097132166226706, loss_t2i: 8.637360135714212, loss_mmu: 20.93621959288915
  Token-Norm: 28.185548782348633
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.54062843322754
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3072396516799927
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 32


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 32 loss: 10.18279422322909, loss_t2i: 8.56276418765386, loss_mmu: 16.662913779417675
  Token-Norm: 28.051042556762695
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.262489318847656
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3121824264526367
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 33


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 33 loss: 8.466030498345694, loss_t2i: 8.52150958776474, loss_mmu: 8.244113584359487
  Token-Norm: 27.915834426879883
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.06137466430664
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.280839204788208
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 34


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 34 loss: 7.994070112705231, loss_t2i: 8.443796863158544, loss_mmu: 6.195162465174993
  Token-Norm: 27.781299591064453
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.982593536376953
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2739356756210327
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 35


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 35 loss: 8.019786755243937, loss_t2i: 8.491315484046936, loss_mmu: 6.133671432733536
  Token-Norm: 27.647489547729492
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.90554428100586
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2675070762634277
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 36


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 36 loss: 7.976222803195317, loss_t2i: 8.449283599853516, loss_mmu: 6.083978533744812
  Token-Norm: 27.5140438079834
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.829959869384766
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2611151933670044
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 37


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 37 loss: 8.01864324013392, loss_t2i: 8.515019724766413, loss_mmu: 6.033136655886968
  Token-Norm: 27.38048553466797
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.755870819091797
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2548410892486572
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 38


100%|██████████| 48/48 [00:21<00:00,  2.27it/s]


Epoch 38 loss: 8.015269140402475, loss_t2i: 8.525553623835245, loss_mmu: 5.974130531152089
  Token-Norm: 27.246509552001953
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.68366813659668
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2485591173171997
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 39


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 39 loss: 7.957617829243342, loss_t2i: 8.471510450045267, loss_mmu: 5.902046819527944
  Token-Norm: 27.11189842224121
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.613460540771484
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2423028945922852
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 40


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 40 loss: 7.877192536989848, loss_t2i: 8.402299960454306, loss_mmu: 5.776762584845225
  Token-Norm: 26.976478576660156
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.545089721679688
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.236090898513794
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 41


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 41 loss: 7.792798360188802, loss_t2i: 8.359210828940073, loss_mmu: 5.52714787920316
  Token-Norm: 26.859943389892578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.478212356567383
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2300289869308472
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 42


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 42 loss: 7.686404834191005, loss_t2i: 8.3388631939888, loss_mmu: 5.076570878426234
  Token-Norm: 26.733474731445312
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.412179946899414
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2240372896194458
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 43


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 43 loss: 7.506288558244705, loss_t2i: 8.195865094661713, loss_mmu: 4.747982169191043
  Token-Norm: 26.604782104492188
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.345821380615234
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2184085845947266
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 44


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 44 loss: 7.486316084861755, loss_t2i: 8.216873248418173, loss_mmu: 4.5640867700179415
  Token-Norm: 26.4749698638916
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.28056526184082
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2128158807754517
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 45


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 45 loss: 7.448493460814158, loss_t2i: 8.210157612959543, loss_mmu: 4.401836554209392
  Token-Norm: 26.346141815185547
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.213842391967773
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2073421478271484
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 46


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 46 loss: 7.4266160527865095, loss_t2i: 8.224230448404947, loss_mmu: 4.2361578692992525
  Token-Norm: 26.215227127075195
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.14394187927246
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2017756700515747
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 47


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 47 loss: 7.374415695667267, loss_t2i: 8.20280314485232, loss_mmu: 4.060865685343742
  Token-Norm: 26.08405113220215
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.071592330932617
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1960328817367554
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 48


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 48 loss: 7.361426850159963, loss_t2i: 8.21841753522555, loss_mmu: 3.933463469147682
  Token-Norm: 25.959753036499023
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.996892929077148
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1906276941299438
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 49


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 49 loss: 7.307281335194905, loss_t2i: 8.174810429414114, loss_mmu: 3.837164431810379
  Token-Norm: 25.84428596496582
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.921435356140137
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1851052045822144
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 50


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 50 loss: 7.3405086894830065, loss_t2i: 8.235409826040268, loss_mmu: 3.760903815428416
  Token-Norm: 25.735336303710938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.846116065979004
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1793378591537476
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 51


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 51 loss: 7.208513846000035, loss_t2i: 8.093508700529734, loss_mmu: 3.66853396097819
  Token-Norm: 25.631122589111328
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.770600318908691
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1738297939300537
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 52


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 52 loss: 7.251741190751393, loss_t2i: 8.16887096563975, loss_mmu: 3.5832216093937554
  Token-Norm: 25.528791427612305
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.695255279541016
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1683405637741089
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 53


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 53 loss: 7.210392991701762, loss_t2i: 8.131917744874954, loss_mmu: 3.524293541908264
  Token-Norm: 25.442378997802734
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.620787620544434
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1626496315002441
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 54


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 54 loss: 7.245721717675527, loss_t2i: 8.192980647087097, loss_mmu: 3.4566854486862817
  Token-Norm: 25.36246681213379
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.54625415802002
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1572248935699463
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 55


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 55 loss: 7.190127591292064, loss_t2i: 8.132287244002024, loss_mmu: 3.421488508582115
  Token-Norm: 25.270999908447266
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.472622871398926
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1517033576965332
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 56


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 56 loss: 7.198233246803284, loss_t2i: 8.16506185134252, loss_mmu: 3.3309182475010553
  Token-Norm: 25.509706497192383
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.399002075195312
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1464353799819946
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 57


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 57 loss: 7.179299513498942, loss_t2i: 8.156728784243265, loss_mmu: 3.2695818344751992
  Token-Norm: 25.499683380126953
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.326632499694824
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1408486366271973
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 58


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 58 loss: 7.13822329044342, loss_t2i: 8.120607068141302, loss_mmu: 3.20868789156278
  Token-Norm: 25.566150665283203
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.254216194152832
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.135570764541626
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 59


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 59 loss: 7.105698734521866, loss_t2i: 8.072201629479727, loss_mmu: 3.2396866778532663
  Token-Norm: 25.625844955444336
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.182363510131836
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.130399465560913
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 60


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 60 loss: 7.292371571063995, loss_t2i: 8.155084609985352, loss_mmu: 3.8415188093980155
  Token-Norm: 25.560060501098633
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.109790802001953
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.125038743019104
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 61


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 61 loss: 7.216396311918895, loss_t2i: 8.139334479967753, loss_mmu: 3.5246431678533554
  Token-Norm: 25.448522567749023
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 15.038612365722656
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1194368600845337
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 62


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 62 loss: 7.145271003246307, loss_t2i: 8.085493902365366, loss_mmu: 3.3843791484832764
  Token-Norm: 25.357585906982422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.967955589294434
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1140341758728027
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 63


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 63 loss: 7.137410591046016, loss_t2i: 8.102143734693527, loss_mmu: 3.278477946917216
  Token-Norm: 25.255483627319336
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.898026466369629
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1087305545806885
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 64


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 64 loss: 7.160056461890538, loss_t2i: 8.16324520111084, loss_mmu: 3.1473012069861093
  Token-Norm: 25.15001106262207
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.828169822692871
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1033860445022583
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 65


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 65 loss: 7.124063402414322, loss_t2i: 8.137532790501913, loss_mmu: 3.0701852589845657
  Token-Norm: 25.070058822631836
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.758926391601562
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0982307195663452
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 66


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 66 loss: 7.247274617354075, loss_t2i: 8.160946736733118, loss_mmu: 3.5925856828689575
  Token-Norm: 25.13626480102539
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.679394721984863
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0936615467071533
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 67


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 67 loss: 7.130293657382329, loss_t2i: 8.121182839075724, loss_mmu: 3.1667362451553345
  Token-Norm: 25.348979949951172
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.616009712219238
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0886435508728027
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 68


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 68 loss: 7.084809323151906, loss_t2i: 8.11127973596255, loss_mmu: 2.9789275377988815
  Token-Norm: 25.3231201171875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.547607421875
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0830761194229126
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 69


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 69 loss: 7.037900269031525, loss_t2i: 8.073155760765076, loss_mmu: 2.896877775589625
  Token-Norm: 25.268247604370117
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.48034954071045
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0777709484100342
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 70


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 70 loss: 6.977181911468506, loss_t2i: 8.017594377199808, loss_mmu: 2.8155315021673837
  Token-Norm: 25.24619483947754
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.41353988647461
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0728152990341187
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 71


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 71 loss: 6.996980865796407, loss_t2i: 8.05286051829656, loss_mmu: 2.7734615206718445
  Token-Norm: 25.310205459594727
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.347918510437012
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0676554441452026
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 72


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 72 loss: 7.03707813223203, loss_t2i: 8.120871037244797, loss_mmu: 2.7019059658050537
  Token-Norm: 25.482410430908203
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.282635688781738
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0627003908157349
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 73


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 73 loss: 6.986692577600479, loss_t2i: 8.071338921785355, loss_mmu: 2.648106766243776
  Token-Norm: 25.856277465820312
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.21785831451416
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0577692985534668
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 74


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 74 loss: 6.977768083413442, loss_t2i: 8.068759590387344, loss_mmu: 2.613801675538222
  Token-Norm: 26.364482879638672
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.153717994689941
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.052801489830017
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 75


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 75 loss: 6.936667998631795, loss_t2i: 8.03520879149437, loss_mmu: 2.5425043304761252
  Token-Norm: 27.717041015625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.089875221252441
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0479947328567505
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 76


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 76 loss: 6.979634215434392, loss_t2i: 8.091482808192572, loss_mmu: 2.5322395687301955
  Token-Norm: 28.79987335205078
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 14.027582168579102
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0428252220153809
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 77


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 77 loss: 6.9662792682647705, loss_t2i: 8.081053594748179, loss_mmu: 2.5071812520424523
  Token-Norm: 29.998228073120117
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.964673042297363
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.037931203842163
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 78


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 78 loss: 6.968024363120397, loss_t2i: 8.088867723941803, loss_mmu: 2.4846503709753356
  Token-Norm: 30.591245651245117
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.902726173400879
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.032645583152771
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 79


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 79 loss: 6.9249100883801775, loss_t2i: 8.063150743643442, loss_mmu: 2.3719467371702194
  Token-Norm: 31.475263595581055
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.840121269226074
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0282002687454224
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 80


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 80 loss: 6.915484348932902, loss_t2i: 8.062464108069738, loss_mmu: 2.327564942340056
  Token-Norm: 32.23976516723633
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.779500961303711
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0233442783355713
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 81


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 81 loss: 6.927460938692093, loss_t2i: 8.085422098636627, loss_mmu: 2.2956159114837646
  Token-Norm: 33.181575775146484
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.718730926513672
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0187925100326538
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 82


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 82 loss: 6.937272588411967, loss_t2i: 8.109816799561182, loss_mmu: 2.2470953638354936
  Token-Norm: 34.496299743652344
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.659082412719727
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0141956806182861
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 83


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 83 loss: 6.913683623075485, loss_t2i: 8.080860416094461, loss_mmu: 2.24497606108586
  Token-Norm: 36.987552642822266
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.599445343017578
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0095821619033813
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 84


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 84 loss: 6.881568868954976, loss_t2i: 8.02518997589747, loss_mmu: 2.307084066172441
  Token-Norm: 40.34861755371094
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.541482925415039
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.004733681678772
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 85


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 85 loss: 7.621260831753413, loss_t2i: 8.363426138957342, loss_mmu: 4.65259924530983
  Token-Norm: 44.339698791503906
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.439655303955078
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9958420991897583
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 86


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 86 loss: 7.074240495761235, loss_t2i: 8.139461398124695, loss_mmu: 2.8133563175797462
  Token-Norm: 45.335262298583984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.399426460266113
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9920075535774231
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 87


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 87 loss: 6.970055470863978, loss_t2i: 8.08204871416092, loss_mmu: 2.522081988553206
  Token-Norm: 45.4487190246582
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.346549034118652
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9878353476524353
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 88


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 88 loss: 6.943551550308864, loss_t2i: 8.08209420243899, loss_mmu: 2.3893806462486586
  Token-Norm: 45.80156326293945
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.29959774017334
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9830363392829895
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 89


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 89 loss: 6.912873486677806, loss_t2i: 8.060096750656763, loss_mmu: 2.3239800284306207
  Token-Norm: 46.35676193237305
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.253681182861328
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9795443415641785
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 90


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 90 loss: 6.8822576105594635, loss_t2i: 8.035156816244125, loss_mmu: 2.270660209159056
  Token-Norm: 46.899322509765625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.200599670410156
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9751623272895813
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 91


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 91 loss: 6.904372930526733, loss_t2i: 8.062516043583551, loss_mmu: 2.2718001330892243
  Token-Norm: 47.823856353759766
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.153745651245117
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9706076979637146
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 92


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 92 loss: 6.862288902203242, loss_t2i: 8.028508832057318, loss_mmu: 2.1974088872472444
  Token-Norm: 49.07587814331055
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.102005958557129
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9665385484695435
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 93


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 93 loss: 6.886916399002075, loss_t2i: 8.04565054178238, loss_mmu: 2.251979341109594
  Token-Norm: 50.5617561340332
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.046346664428711
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.961997389793396
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 94


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 94 loss: 6.847931146621704, loss_t2i: 8.02557705839475, loss_mmu: 2.1373471344510713
  Token-Norm: 51.46027374267578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.994990348815918
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9573250412940979
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 95


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 95 loss: 6.849728186925252, loss_t2i: 8.027865678071976, loss_mmu: 2.1371775195002556
  Token-Norm: 52.85623550415039
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.940752029418945
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.952927827835083
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 96


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 96 loss: 6.828349471092224, loss_t2i: 8.022865682840347, loss_mmu: 2.0502839808662734
  Token-Norm: 53.76259231567383
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.885242462158203
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9485701322555542
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 97


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 97 loss: 6.857771138350169, loss_t2i: 8.077137817939123, loss_mmu: 1.9803041194876034
  Token-Norm: 54.71633529663086
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.830612182617188
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9441001415252686
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 98


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 98 loss: 6.881348629792531, loss_t2i: 8.09593641757965, loss_mmu: 2.0229969521363578
  Token-Norm: 55.899681091308594
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.776533126831055
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9394505023956299
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 99


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 99 loss: 6.895149578650792, loss_t2i: 8.106335947910944, loss_mmu: 2.0504035154978433
  Token-Norm: 57.391170501708984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.722235679626465
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9350435733795166
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 100


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 100 loss: 6.799709568421046, loss_t2i: 8.007088919480642, loss_mmu: 1.9701913545529048
  Token-Norm: 59.136512756347656
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.66917896270752
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9310660362243652
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 101


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 101 loss: 6.805187404155731, loss_t2i: 8.036131183306376, loss_mmu: 1.881411741177241
  Token-Norm: 60.00718688964844
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.617263793945312
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9269139170646667
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 102


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 102 loss: 6.85394416252772, loss_t2i: 8.089621553818384, loss_mmu: 1.9112339441974957
  Token-Norm: 61.1043815612793
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.568216323852539
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9225580096244812
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 103


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 103 loss: 6.835430880387624, loss_t2i: 8.073386192321777, loss_mmu: 1.8836092179020245
  Token-Norm: 63.395408630371094
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.517241477966309
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9182079434394836
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 104


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 104 loss: 6.791562924782435, loss_t2i: 8.03755979736646, loss_mmu: 1.8075749451915424
  Token-Norm: 64.33905029296875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.4649076461792
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9137971997261047
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 105


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 105 loss: 6.803349862496058, loss_t2i: 8.053153137365976, loss_mmu: 1.804136022925377
  Token-Norm: 65.8409423828125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.414203643798828
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9098901748657227
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 106


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 106 loss: 6.847602307796478, loss_t2i: 8.044571548700333, loss_mmu: 2.0597249368826547
  Token-Norm: 68.86734771728516
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.366707801818848
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9072664380073547
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 107


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 107 loss: 6.860886712869008, loss_t2i: 8.063048442204794, loss_mmu: 2.0522394503156343
  Token-Norm: 70.48419952392578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.321935653686523
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9011537432670593
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 108


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 108 loss: 6.807801554600398, loss_t2i: 8.034542322158813, loss_mmu: 1.9008381143212318
  Token-Norm: 71.81793975830078
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.267535209655762
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.898316502571106
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 109


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 109 loss: 6.819710503021876, loss_t2i: 8.056735187768936, loss_mmu: 1.871611165503661
  Token-Norm: 72.53639221191406
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.216475486755371
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8946409225463867
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 110


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 110 loss: 6.74763552347819, loss_t2i: 7.993873119354248, loss_mmu: 1.7626844843228657
  Token-Norm: 73.44625091552734
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.165635108947754
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8901683688163757
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 111


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 111 loss: 6.8554457028706866, loss_t2i: 8.073199301958084, loss_mmu: 1.9844308917721112
  Token-Norm: 74.52228546142578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.121428489685059
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8856784701347351
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 112


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 112 loss: 6.748820722103119, loss_t2i: 7.976992090543111, loss_mmu: 1.8361345554391544
  Token-Norm: 75.56472778320312
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.067829132080078
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8812661170959473
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 113


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 113 loss: 6.726895143588384, loss_t2i: 8.003199170033136, loss_mmu: 1.6216785808404286
  Token-Norm: 76.06278228759766
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.01384162902832
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8772501349449158
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 114


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 114 loss: 6.642565121253331, loss_t2i: 7.899004568656285, loss_mmu: 1.6168069106837113
  Token-Norm: 76.6508560180664
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.947857856750488
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8727392554283142
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 115


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 115 loss: 8.681488186120987, loss_t2i: 8.278830895821253, loss_mmu: 10.292116815845171
  Token-Norm: 79.05474090576172
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.745149612426758
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9242942333221436
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 116


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 116 loss: 11.731374084949493, loss_t2i: 8.87183345357577, loss_mmu: 23.169535567363102
  Token-Norm: 80.40349578857422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 13.889830589294434
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9662200212478638
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 117


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 117 loss: 11.857768873373667, loss_t2i: 8.26800168553988, loss_mmu: 26.21683657169342
  Token-Norm: 80.31900024414062
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.6360502243042
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.002785563468933
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 118


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 118 loss: 8.993486603101095, loss_t2i: 8.19385767976443, loss_mmu: 12.192001312971115
  Token-Norm: 79.97252655029297
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.172016143798828
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9631059765815735
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 119


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 119 loss: 8.21965636809667, loss_t2i: 8.159412006537119, loss_mmu: 8.460633317629496
  Token-Norm: 79.59700012207031
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 12.05583381652832
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9200453162193298
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 120


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 120 loss: 7.754716446002324, loss_t2i: 8.158512940009436, loss_mmu: 6.139529635508855
  Token-Norm: 79.25090026855469
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.91607666015625
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9201977252960205
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 121


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 121 loss: 7.677590837081273, loss_t2i: 8.164068847894669, loss_mmu: 5.731678267319997
  Token-Norm: 78.8958511352539
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.859827041625977
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9093326926231384
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 122


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 122 loss: 7.78411387403806, loss_t2i: 8.199560672044754, loss_mmu: 6.122326175371806
  Token-Norm: 78.54850769042969
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.723384857177734
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9086171388626099
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 123


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 123 loss: 7.823312213023503, loss_t2i: 8.143939803044, loss_mmu: 6.540801256895065
  Token-Norm: 78.2699203491211
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.64596176147461
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8959476351737976
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 124


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 124 loss: 7.628392388423284, loss_t2i: 8.146947065989176, loss_mmu: 5.5541729380687075
  Token-Norm: 77.97882843017578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.562573432922363
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8915209770202637
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 125


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 125 loss: 7.387644628683726, loss_t2i: 8.103192836046219, loss_mmu: 4.52545140683651
  Token-Norm: 77.90473937988281
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.535118103027344
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8859699368476868
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 126


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 126 loss: 7.442320823669434, loss_t2i: 8.160652389129003, loss_mmu: 4.56899419426918
  Token-Norm: 78.2182388305664
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.49575138092041
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8811028599739075
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 127


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 127 loss: 7.4383542736371355, loss_t2i: 8.10513311624527, loss_mmu: 4.771238178014755
  Token-Norm: 78.10670471191406
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.49069881439209
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8783476948738098
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 128


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 128 loss: 7.39859985311826, loss_t2i: 8.153839121262232, loss_mmu: 4.377642343441646
  Token-Norm: 77.78330993652344
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.434549331665039
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8751779198646545
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 129


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 129 loss: 7.406472831964493, loss_t2i: 8.10188403725624, loss_mmu: 4.624827499190967
  Token-Norm: 77.72364044189453
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.352398872375488
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.870741605758667
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 130


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 130 loss: 7.284562806288402, loss_t2i: 8.105131189028421, loss_mmu: 4.0022886743148165
  Token-Norm: 78.0854721069336
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.282690048217773
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.867407500743866
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 131


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 131 loss: 7.424176136652629, loss_t2i: 8.058808982372284, loss_mmu: 4.885644326607387
  Token-Norm: 79.93312072753906
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.219087600708008
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8619001507759094
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 132


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 132 loss: 7.336550364891688, loss_t2i: 8.152329862117767, loss_mmu: 4.07343206803004
  Token-Norm: 79.96827697753906
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.185650825500488
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8579576015472412
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 133


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 133 loss: 7.231661111116409, loss_t2i: 8.14191069205602, loss_mmu: 3.5906621714433036
  Token-Norm: 80.451416015625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.135047912597656
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8544374704360962
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 134


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 134 loss: 7.1371710896492, loss_t2i: 8.074667225281397, loss_mmu: 3.3871861547231674
  Token-Norm: 81.06169128417969
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.090781211853027
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8499370217323303
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 135


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 135 loss: 7.143608540296555, loss_t2i: 8.112805217504501, loss_mmu: 3.266821344693502
  Token-Norm: 81.67509460449219
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 11.039864540100098
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.846331775188446
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 136


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 136 loss: 7.124019900957744, loss_t2i: 8.114392499128977, loss_mmu: 3.162529061237971
  Token-Norm: 82.28058624267578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.993879318237305
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8421326279640198
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 137


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 137 loss: 7.127389967441559, loss_t2i: 8.13879711429278, loss_mmu: 3.0817608137925467
  Token-Norm: 82.73246765136719
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.948692321777344
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8379440307617188
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 138


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 138 loss: 7.110946148633957, loss_t2i: 8.134902407725653, loss_mmu: 3.0151206205288568
  Token-Norm: 83.38953399658203
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.902987480163574
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8339653611183167
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 139


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 139 loss: 7.076218545436859, loss_t2i: 8.100663860638937, loss_mmu: 2.978436678647995
  Token-Norm: 83.73661041259766
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.857260704040527
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8301510214805603
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 140


100%|██████████| 48/48 [00:21<00:00,  2.23it/s]


Epoch 140 loss: 7.020867506663005, loss_t2i: 8.04763999581337, loss_mmu: 2.913777232170105
  Token-Norm: 84.08892059326172
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.812057495117188
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.826286792755127
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 141


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 141 loss: 7.049019972483317, loss_t2i: 8.070585588614145, loss_mmu: 2.96275702615579
  Token-Norm: 85.82439422607422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.770773887634277
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.822228193283081
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 142


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 142 loss: 7.063227345546086, loss_t2i: 8.104880541563034, loss_mmu: 2.896614005168279
  Token-Norm: 88.41668701171875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.726693153381348
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8186630606651306
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 143


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 143 loss: 7.158115029335022, loss_t2i: 8.129777391751608, loss_mmu: 3.2714650432268777
  Token-Norm: 93.55294799804688
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.695401191711426
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8135254383087158
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 144


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 144 loss: 7.134462634722392, loss_t2i: 8.148802151282629, loss_mmu: 3.0771041562159858
  Token-Norm: 95.25424194335938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.646580696105957
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8105453848838806
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 145


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 145 loss: 7.071756134430568, loss_t2i: 8.115338106950125, loss_mmu: 2.8974278469880423
  Token-Norm: 95.32485961914062
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.601452827453613
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.807387113571167
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 146


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 146 loss: 7.091855774323146, loss_t2i: 8.075102070967356, loss_mmu: 3.1588703046242395
  Token-Norm: 98.44822692871094
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.556159019470215
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8039330840110779
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 147


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 147 loss: 7.04454289873441, loss_t2i: 8.086613555749258, loss_mmu: 2.8762597342332206
  Token-Norm: 98.51593780517578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.51276969909668
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.8004534840583801
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 148


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 148 loss: 7.071733266115189, loss_t2i: 8.157830248276392, loss_mmu: 2.7273448978861174
  Token-Norm: 98.54621887207031
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.470620155334473
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7971505522727966
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 149


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 149 loss: 7.006687253713608, loss_t2i: 8.099149684111277, loss_mmu: 2.6368373384078345
  Token-Norm: 98.4352035522461
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.44390869140625
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.793641984462738
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 150


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 150 loss: 6.952210625012715, loss_t2i: 8.039435267448425, loss_mmu: 2.6033117150266967
  Token-Norm: 98.37755584716797
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.418782234191895
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7891553044319153
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 151


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 151 loss: 7.001927495002747, loss_t2i: 8.109785556793213, loss_mmu: 2.5704950069387755
  Token-Norm: 98.83889770507812
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.373679161071777
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7858299016952515
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 152


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 152 loss: 6.978725065787633, loss_t2i: 8.091775675614675, loss_mmu: 2.52652216454347
  Token-Norm: 98.7804946899414
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.333552360534668
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7818940877914429
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 153


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 153 loss: 6.993044296900432, loss_t2i: 8.079982668161392, loss_mmu: 2.6452902629971504
  Token-Norm: 98.91466522216797
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.294394493103027
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7779471278190613
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 154


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 154 loss: 7.000834554433823, loss_t2i: 8.066947738329569, loss_mmu: 2.73638125260671
  Token-Norm: 99.75912475585938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.257169723510742
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7743979096412659
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 155


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 155 loss: 6.94829844435056, loss_t2i: 8.00710184375445, loss_mmu: 2.713084541261196
  Token-Norm: 101.6461181640625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.21510124206543
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7708145380020142
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 156


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 156 loss: 6.956580579280853, loss_t2i: 8.058349768320719, loss_mmu: 2.549503487845262
  Token-Norm: 101.95295715332031
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.177488327026367
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7669529914855957
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 157


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 157 loss: 6.90941721200943, loss_t2i: 8.044141411781311, loss_mmu: 2.370519566039244
  Token-Norm: 101.89106750488281
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.13724422454834
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7630759477615356
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 158


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 158 loss: 6.847658018271129, loss_t2i: 7.979853490988414, loss_mmu: 2.3188759411374726
  Token-Norm: 101.68511199951172
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.090071678161621
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7599080204963684
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 159


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 159 loss: 6.843299309412639, loss_t2i: 7.983879874149959, loss_mmu: 2.280976583560308
  Token-Norm: 101.79236602783203
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.047720909118652
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7562493681907654
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 160


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 160 loss: 6.855553835630417, loss_t2i: 8.01555891831716, loss_mmu: 2.2155329709251723
  Token-Norm: 101.77851867675781
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 10.005253791809082
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7525882720947266
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 161


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 161 loss: 6.876078834136327, loss_t2i: 8.05672029654185, loss_mmu: 2.153512326379617
  Token-Norm: 101.8283462524414
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.958587646484375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7494211196899414
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 162


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 162 loss: 6.8233040769894915, loss_t2i: 7.994561562935512, loss_mmu: 2.1382736613353095
  Token-Norm: 102.07085418701172
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.910269737243652
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7462946176528931
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 163


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 163 loss: 6.784065783023834, loss_t2i: 7.95575937628746, loss_mmu: 2.0972907741864524
  Token-Norm: 102.15902709960938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.864580154418945
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.74289870262146
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 164


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 164 loss: 6.862995475530624, loss_t2i: 8.044379860162735, loss_mmu: 2.1374574477473893
  Token-Norm: 102.85772705078125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.82121753692627
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7393571734428406
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 165


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 165 loss: 6.793514470259349, loss_t2i: 7.956922223170598, loss_mmu: 2.1398828054467836
  Token-Norm: 103.64280700683594
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.784958839416504
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7354221343994141
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 166


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 166 loss: 6.7632407148679095, loss_t2i: 7.96110259493192, loss_mmu: 1.9717929785450299
  Token-Norm: 103.6263198852539
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.740608215332031
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.731899619102478
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 167


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 167 loss: 6.77334847052892, loss_t2i: 7.960982918739319, loss_mmu: 2.022810516258081
  Token-Norm: 103.80140686035156
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.708589553833008
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7273412346839905
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 168


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 168 loss: 6.703375885883967, loss_t2i: 7.8900785346825915, loss_mmu: 1.9565649156769116
  Token-Norm: 104.16316223144531
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.664220809936523
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7241273522377014
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 169


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 169 loss: 6.631553987661998, loss_t2i: 7.818551202615102, loss_mmu: 1.8835647975405057
  Token-Norm: 104.30728149414062
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.61767578125
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7209184765815735
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 170


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 170 loss: 6.675219416618347, loss_t2i: 7.876555323600769, loss_mmu: 1.8698753093679745
  Token-Norm: 104.45269775390625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.561781883239746
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7189338207244873
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 171


100%|██████████| 48/48 [00:21<00:00,  2.26it/s]


Epoch 171 loss: 6.591707597176234, loss_t2i: 7.788000265757243, loss_mmu: 1.8065365180373192
  Token-Norm: 104.70032501220703
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.515815734863281
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7162051796913147
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 172


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 172 loss: 6.581673234701157, loss_t2i: 7.800044397513072, loss_mmu: 1.7081882481773694
  Token-Norm: 105.14568328857422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.470129013061523
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7136027216911316
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 173


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 173 loss: 6.538625468810399, loss_t2i: 7.724162568648656, loss_mmu: 1.7964766348401706
  Token-Norm: 106.25257110595703
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.427692413330078
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7104900479316711
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 174


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 174 loss: 6.492598434289296, loss_t2i: 7.69517594575882, loss_mmu: 1.6822879165410995
  Token-Norm: 106.8423843383789
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.390076637268066
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7067033052444458
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 175


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 175 loss: 6.483239491780599, loss_t2i: 7.707204192876816, loss_mmu: 1.5873801137010257
  Token-Norm: 107.33280181884766
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.440967559814453
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7076622843742371
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 176


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 176 loss: 6.5246012806892395, loss_t2i: 7.684937079747518, loss_mmu: 1.8832576672236125
  Token-Norm: 108.55961608886719
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.443241119384766
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.7055084705352783
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 177


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 177 loss: 6.415197789669037, loss_t2i: 7.629262348016103, loss_mmu: 1.5589391340812047
  Token-Norm: 108.99739837646484
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.392630577087402
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.702921450138092
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 178


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 178 loss: 6.478000342845917, loss_t2i: 7.737315734227498, loss_mmu: 1.4407383960982163
  Token-Norm: 109.14401245117188
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.339394569396973
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.700771689414978
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 179


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 179 loss: 6.253909324606259, loss_t2i: 7.479117155075073, loss_mmu: 1.353077846268813
  Token-Norm: 110.04364776611328
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.303701400756836
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.6971632838249207
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 180


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 180 loss: 6.1499532262484236, loss_t2i: 7.376884490251541, loss_mmu: 1.2422278883556526
  Token-Norm: 110.21238708496094
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.253931045532227
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.6946027278900146
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 181


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 181 loss: 6.068415989478429, loss_t2i: 7.287730157375336, loss_mmu: 1.191159022351106
  Token-Norm: 110.53137969970703
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.204492568969727
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.692205011844635
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 182


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 182 loss: 6.131653000911077, loss_t2i: 7.34491824110349, loss_mmu: 1.2785917036235332
  Token-Norm: 111.27301025390625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 9.181118965148926
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.6900314688682556
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 183


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 183 loss: 6.025356262922287, loss_t2i: 7.2227140466372175, loss_mmu: 1.2359246475001175
  Token-Norm: 111.8300552368164
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.422740936279297
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9639138579368591
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 184


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 184 loss: 5.851343949635823, loss_t2i: 7.033729841311772, loss_mmu: 1.1217998458693426
  Token-Norm: 111.83494567871094
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.474515914916992
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9625416994094849
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 185


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 185 loss: 5.8052935401598615, loss_t2i: 6.9529759387175245, loss_mmu: 1.2145632679263751
  Token-Norm: 112.4570083618164
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.390832901000977
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9584760069847107
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 186


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 186 loss: 6.025999426841736, loss_t2i: 7.09177350004514, loss_mmu: 1.7629025566081207
  Token-Norm: 115.47976684570312
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.714252471923828
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9682196974754333
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 187


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 187 loss: 6.0040898223718004, loss_t2i: 7.081691751877467, loss_mmu: 1.693681535621484
  Token-Norm: 116.47676086425781
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.788957595825195
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9656820893287659
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 188


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 188 loss: 5.59502379099528, loss_t2i: 6.664675364891688, loss_mmu: 1.3164168832202752
  Token-Norm: 116.23532104492188
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.704622268676758
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9615446925163269
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 189


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 189 loss: 6.940804968277614, loss_t2i: 7.519258747498195, loss_mmu: 4.626989305019379
  Token-Norm: 121.85354614257812
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.67864227294922
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9527794718742371
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 190


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 190 loss: 7.533314406871796, loss_t2i: 8.262043158213297, loss_mmu: 4.618398850162824
  Token-Norm: 122.2867431640625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.642635345458984
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9498299956321716
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 191


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 191 loss: 7.353588263193767, loss_t2i: 8.244747420152029, loss_mmu: 3.7889512379964194
  Token-Norm: 121.73495483398438
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.5588321685791
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.945395290851593
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 192


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 192 loss: 7.0477063457171125, loss_t2i: 7.99822136759758, loss_mmu: 3.245645766456922
  Token-Norm: 121.1722640991211
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.474794387817383
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9408926963806152
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 193


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 193 loss: 6.937206635872523, loss_t2i: 7.901864280303319, loss_mmu: 3.078575477004051
  Token-Norm: 120.60531616210938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.400117874145508
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9368822574615479
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 194


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 194 loss: 7.397765398025513, loss_t2i: 8.28239885965983, loss_mmu: 3.8592311094204583
  Token-Norm: 120.04045867919922
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.81865882873535
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0136847496032715
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 195


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 195 loss: 7.227596988280614, loss_t2i: 8.214737743139267, loss_mmu: 3.2790336509545646
  Token-Norm: 119.4532699584961
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.884113311767578
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.013766884803772
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 196


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 196 loss: 7.042142142852147, loss_t2i: 8.051173200209936, loss_mmu: 3.006017411748568
  Token-Norm: 118.89317321777344
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.793197631835938
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0093485116958618
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 197


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 197 loss: 6.9865124722321825, loss_t2i: 8.007622828086218, loss_mmu: 2.9020706514517465
  Token-Norm: 118.33473205566406
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.703311920166016
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0046638250350952
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 198


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 198 loss: 6.924308687448502, loss_t2i: 7.960726271073024, loss_mmu: 2.7786379804213843
  Token-Norm: 117.77584075927734
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.616527557373047
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9997529983520508
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 199


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 199 loss: 6.895702034235001, loss_t2i: 7.94969720641772, loss_mmu: 2.679721288383007
  Token-Norm: 117.22529602050781
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.52452850341797
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9953433275222778
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 200


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 200 loss: 6.861366579929988, loss_t2i: 7.924980739752452, loss_mmu: 2.606909193098545
  Token-Norm: 116.6851806640625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.447843551635742
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.989849328994751
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 201


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 201 loss: 6.7762348254521685, loss_t2i: 7.817176312208176, loss_mmu: 2.6124685357014337
  Token-Norm: 116.1588134765625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.379974365234375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9842883944511414
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 202


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 202 loss: 6.658023953437805, loss_t2i: 7.674976706504822, loss_mmu: 2.5902123525738716
  Token-Norm: 115.67208862304688
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.305204391479492
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9792913794517517
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 203


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 203 loss: 6.412484586238861, loss_t2i: 7.425707896550496, loss_mmu: 2.359590840836366
  Token-Norm: 115.29119873046875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.244760513305664
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9742992520332336
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 204


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 204 loss: 6.222638423244159, loss_t2i: 7.20781072974205, loss_mmu: 2.2819488967458406
  Token-Norm: 115.04093933105469
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.143474578857422
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9709912538528442
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 205


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 205 loss: 5.767795672019322, loss_t2i: 6.700154314438502, loss_mmu: 2.0383607372641563
  Token-Norm: 115.50587463378906
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.05590057373047
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9668124914169312
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 206


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 206 loss: 5.563900917768478, loss_t2i: 6.476596653461456, loss_mmu: 1.9131175602475803
  Token-Norm: 115.34916687011719
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.966529846191406
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9630408883094788
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 207


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 207 loss: 5.283855949838956, loss_t2i: 6.174150576194127, loss_mmu: 1.722677042086919
  Token-Norm: 115.39710998535156
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.88437843322754
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.9591214060783386
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 208


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 208 loss: 6.625155394275983, loss_t2i: 7.164365236957868, loss_mmu: 4.468315561612447
  Token-Norm: 116.64131164550781
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.99565315246582
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.010856032371521
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 209


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 209 loss: 6.952983170747757, loss_t2i: 7.80039718747139, loss_mmu: 3.5633264432350793
  Token-Norm: 116.466064453125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.203489303588867
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0157158374786377
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 210


100%|██████████| 48/48 [00:21<00:00,  2.24it/s]


Epoch 210 loss: 6.480369995037715, loss_t2i: 7.33211949467659, loss_mmu: 3.073371628920237
  Token-Norm: 116.17558288574219
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.10298728942871
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.013120412826538
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 211


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 211 loss: 5.6409731805324554, loss_t2i: 6.324604680140813, loss_mmu: 2.9064466382066407
  Token-Norm: 116.15772247314453
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.084516525268555
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.012921690940857
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 212


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 212 loss: 5.27197880546252, loss_t2i: 5.9876452932755155, loss_mmu: 2.4093123227357864
  Token-Norm: 116.32633209228516
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.998817443847656
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.008299708366394
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 213


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 213 loss: 5.269798209269841, loss_t2i: 5.904847929875056, loss_mmu: 2.7295989990234375
  Token-Norm: 116.83305358886719
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.91016387939453
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0037931203842163
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 214


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 214 loss: 4.698616474866867, loss_t2i: 5.3105254371960955, loss_mmu: 2.250980337460836
  Token-Norm: 116.505615234375
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.803234100341797
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0056583881378174
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 215


100%|██████████| 48/48 [00:21<00:00,  2.27it/s]


Epoch 215 loss: 4.493336945772171, loss_t2i: 5.074202875296275, loss_mmu: 2.16987286756436
  Token-Norm: 116.27740478515625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.72344207763672
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.001833200454712
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 216


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 216 loss: 4.250481074055036, loss_t2i: 4.822117447853088, loss_mmu: 1.9639353056748707
  Token-Norm: 116.39752960205078
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.639406204223633
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 0.999286949634552
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 217


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 217 loss: 4.154531439145406, loss_t2i: 4.700819452603658, loss_mmu: 1.9693791742126148
  Token-Norm: 116.64607238769531
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.870092391967773
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.031636118888855
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 218


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 218 loss: 3.788123182952404, loss_t2i: 4.296686579783757, loss_mmu: 1.7538694068789482
  Token-Norm: 116.24201202392578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.803306579589844
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0283054113388062
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 219


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 219 loss: 3.6452900717655816, loss_t2i: 4.14204063018163, loss_mmu: 1.6582877586285274
  Token-Norm: 115.89221954345703
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.70808219909668
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0281504392623901
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 220


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 220 loss: 3.4089647978544235, loss_t2i: 3.889264650642872, loss_mmu: 1.4877651010950406
  Token-Norm: 115.7474594116211
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.624780654907227
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0240190029144287
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 221


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 221 loss: 3.322583923737208, loss_t2i: 3.695388371745745, loss_mmu: 1.831365776558717
  Token-Norm: 115.62042236328125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.545515060424805
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0201983451843262
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 222


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 222 loss: 3.120489368836085, loss_t2i: 3.5351733217636743, loss_mmu: 1.461753398180008
  Token-Norm: 115.1605453491211
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.496009826660156
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0239591598510742
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 223


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 223 loss: 3.2201502372821174, loss_t2i: 3.674417863289515, loss_mmu: 1.4030794973174732
  Token-Norm: 114.74114990234375
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.417509078979492
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0203814506530762
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 224


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 224 loss: 2.8228897228837013, loss_t2i: 3.195050910115242, loss_mmu: 1.3342447044948738
  Token-Norm: 114.56166076660156
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.339508056640625
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.016822099685669
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 225


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 225 loss: 2.9041469941536584, loss_t2i: 3.292799159884453, loss_mmu: 1.3495382517576218
  Token-Norm: 114.4058837890625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.237199783325195
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0184284448623657
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 226


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 226 loss: 2.715922969082991, loss_t2i: 3.092642871042093, loss_mmu: 1.2090430619815986
  Token-Norm: 114.03842163085938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.158039093017578
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0151574611663818
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 227


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 227 loss: 2.9726510643959045, loss_t2i: 3.357822258025408, loss_mmu: 1.4319660315910976
  Token-Norm: 114.70417785644531
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.555301666259766
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0596669912338257
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 228


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 228 loss: 2.59428812811772, loss_t2i: 2.9117285075287023, loss_mmu: 1.324526349703471
  Token-Norm: 114.50325012207031
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.79095458984375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.087414026260376
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 229


100%|██████████| 48/48 [00:21<00:00,  2.24it/s]


Epoch 229 loss: 2.5026616975665092, loss_t2i: 2.8457734435796738, loss_mmu: 1.1302145048975945
  Token-Norm: 114.08071899414062
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.70746612548828
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0837098360061646
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 230


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 230 loss: 2.3269794968267283, loss_t2i: 2.63826033907632, loss_mmu: 1.0818558484315872
  Token-Norm: 113.695068359375
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.622936248779297
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0792019367218018
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 231


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 231 loss: 2.3311244770884514, loss_t2i: 2.667025533815225, loss_mmu: 0.987520094960928
  Token-Norm: 113.27769470214844
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.544315338134766
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0884830951690674
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 232


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 232 loss: 2.358411761621634, loss_t2i: 2.715378620972236, loss_mmu: 0.9305440330257019
  Token-Norm: 112.8438720703125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.49978256225586
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0922415256500244
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 233


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 233 loss: 2.2161996886134148, loss_t2i: 2.542113366226355, loss_mmu: 0.9125447583695253
  Token-Norm: 112.51554870605469
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.414966583251953
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0883408784866333
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 234


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 234 loss: 2.077522767086824, loss_t2i: 2.382180944085121, loss_mmu: 0.8588898498564959
  Token-Norm: 112.18060302734375
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.334026336669922
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0842167139053345
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 235


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 235 loss: 2.1516087042788663, loss_t2i: 2.490558732300997, loss_mmu: 0.7958083717773358
  Token-Norm: 111.77822875976562
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.24012565612793
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0846545696258545
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 236


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 236 loss: 2.1126160447796187, loss_t2i: 2.431948810815811, loss_mmu: 0.8352848521123329
  Token-Norm: 111.45226287841797
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.1577205657959
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0825397968292236
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 237


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 237 loss: 2.0695229756335416, loss_t2i: 2.3811033492287, loss_mmu: 0.8232012676695982
  Token-Norm: 111.14022064208984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.1929988861084
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1075234413146973
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 238


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 238 loss: 2.036040926973025, loss_t2i: 2.35560484106342, loss_mmu: 0.7577852091441551
  Token-Norm: 110.8351058959961
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.12468147277832
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1043049097061157
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 239


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 239 loss: 2.13042792926232, loss_t2i: 2.479495162765185, loss_mmu: 0.7341588089863459
  Token-Norm: 110.55757904052734
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.042203903198242
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1000946760177612
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 240


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 240 loss: 1.8076843259235222, loss_t2i: 2.0886452371875444, loss_mmu: 0.6838404772182306
  Token-Norm: 110.2519302368164
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.961769104003906
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0959078073501587
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 241


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 241 loss: 1.9306221355994542, loss_t2i: 2.237334199870626, loss_mmu: 0.7037736481676499
  Token-Norm: 110.11088562011719
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.87983512878418
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0919692516326904
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 242


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 242 loss: 1.745162246748805, loss_t2i: 2.0230363281443715, loss_mmu: 0.6336657280723254
  Token-Norm: 109.78163146972656
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.798593521118164
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0876160860061646
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 243


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 243 loss: 1.7689811326563358, loss_t2i: 2.0471448991447687, loss_mmu: 0.6563258816798528
  Token-Norm: 109.53730773925781
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.71725082397461
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.084462285041809
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 244


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 244 loss: 2.0064409573872886, loss_t2i: 2.2937180809676647, loss_mmu: 0.8573322705924511
  Token-Norm: 109.6410903930664
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.630178451538086
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0813839435577393
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 245


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 245 loss: 1.7501211973528068, loss_t2i: 2.0280579080184302, loss_mmu: 0.6383742373436689
  Token-Norm: 109.49199676513672
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.550029754638672
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.076913595199585
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 246


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 246 loss: 1.7817646612723668, loss_t2i: 2.042482564225793, loss_mmu: 0.738892937079072
  Token-Norm: 110.278564453125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.46978187561035
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0727252960205078
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 247


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 247 loss: 1.7657275379945834, loss_t2i: 2.038764695947369, loss_mmu: 0.6735787376140555
  Token-Norm: 110.27996063232422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.555753707885742
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1073260307312012
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 248


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 248 loss: 1.9209806720415752, loss_t2i: 2.167462505400181, loss_mmu: 0.9350532454748949
  Token-Norm: 111.26658630371094
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.880491256713867
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1439274549484253
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 249


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 249 loss: 1.6035399915029604, loss_t2i: 1.838195643387735, loss_mmu: 0.6649171908696493
  Token-Norm: 111.03345489501953
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.806686401367188
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.140190601348877
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 250


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 250 loss: 1.2338796528056264, loss_t2i: 1.4020088181520503, loss_mmu: 0.5613629076008996
  Token-Norm: 110.71505737304688
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.72574806213379
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1355388164520264
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 251


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 251 loss: 1.5665970221161842, loss_t2i: 1.7780501879751682, loss_mmu: 0.7207842307786146
  Token-Norm: 110.43831634521484
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.765487670898438
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1346815824508667
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 252


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 252 loss: 1.5304312265167634, loss_t2i: 1.7662626057863235, loss_mmu: 0.5871056060617169
  Token-Norm: 110.11985778808594
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.765405654907227
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1425716876983643
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 253


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 253 loss: 1.636660414437453, loss_t2i: 1.9336500807354848, loss_mmu: 0.4487016014754772
  Token-Norm: 109.79569244384766
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.685245513916016
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1383012533187866
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 254


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 254 loss: 1.5966729583839576, loss_t2i: 1.8836782382180293, loss_mmu: 0.4486517213905851
  Token-Norm: 109.48516845703125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.60331916809082
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1339329481124878
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 255


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 255 loss: 1.3530297788480918, loss_t2i: 1.585422954832514, loss_mmu: 0.4234569404895107
  Token-Norm: 109.20524597167969
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.52397346496582
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1292753219604492
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 256


100%|██████████| 48/48 [00:21<00:00,  2.27it/s]


Epoch 256 loss: 1.2641462379445632, loss_t2i: 1.4771969374269247, loss_mmu: 0.41194336240490276
  Token-Norm: 108.86669158935547
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.443716049194336
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1251726150512695
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 257


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 257 loss: 1.5687289951990049, loss_t2i: 1.7963722571730614, loss_mmu: 0.6581558703134457
  Token-Norm: 108.57730102539062
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.415502548217773
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1125688552856445
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 258


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 258 loss: 1.9044288670023282, loss_t2i: 2.1125494396934905, loss_mmu: 1.0719463638961315
  Token-Norm: 109.01939392089844
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.26944923400879
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1084940433502197
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 259


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 259 loss: 1.826242994517088, loss_t2i: 2.0012624971568584, loss_mmu: 1.1261649119357269
  Token-Norm: 109.77359771728516
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.188594818115234
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1034773588180542
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 260


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 260 loss: 1.5958716583748658, loss_t2i: 1.7906610034406185, loss_mmu: 0.8167142116775116
  Token-Norm: 109.9194564819336
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.126005172729492
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1074305772781372
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 261


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 261 loss: 1.5176187635709841, loss_t2i: 1.7148869894444942, loss_mmu: 0.7285457241038481
  Token-Norm: 109.74455261230469
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.054550170898438
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.103711724281311
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 262


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 262 loss: 1.5187773238867521, loss_t2i: 1.7424323000013828, loss_mmu: 0.6241573011502624
  Token-Norm: 109.54376220703125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.976465225219727
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0989960432052612
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 263


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 263 loss: 1.3522377392898004, loss_t2i: 1.572034605468313, loss_mmu: 0.4730502280096213
  Token-Norm: 109.2626953125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.898643493652344
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0942827463150024
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 264


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 264 loss: 1.4061011634767056, loss_t2i: 1.6470193836527567, loss_mmu: 0.44242817039291066
  Token-Norm: 109.02085876464844
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.822851181030273
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.089880108833313
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 265


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 265 loss: 1.3001653840765357, loss_t2i: 1.5288957295318444, loss_mmu: 0.3852438349276781
  Token-Norm: 108.96015930175781
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.745941162109375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0853337049484253
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 266


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 266 loss: 1.0911342669278383, loss_t2i: 1.2747200418574114, loss_mmu: 0.35679109549770754
  Token-Norm: 108.56595611572266
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.670303344726562
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.0810643434524536
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 267


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 267 loss: 1.1440003517394264, loss_t2i: 1.3450345403204362, loss_mmu: 0.33986355861028034
  Token-Norm: 108.30950927734375
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.59563636779785
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.076817512512207
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 268


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 268 loss: 1.1910700319955747, loss_t2i: 1.408897887604932, loss_mmu: 0.3197585344314575
  Token-Norm: 108.00897979736328
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.984111785888672
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.1354504823684692
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 269


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 269 loss: 1.070277472647528, loss_t2i: 1.2588002160191536, loss_mmu: 0.3161863995095094
  Token-Norm: 107.7403335571289
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.45919418334961
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3513810634613037
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 270


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 270 loss: 0.9832894426460067, loss_t2i: 1.1539057921618223, loss_mmu: 0.30082398901383084
  Token-Norm: 107.46527099609375
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.644119262695312
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.384960651397705
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 271


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 271 loss: 1.0805673253101606, loss_t2i: 1.2749131619930267, loss_mmu: 0.303183906711638
  Token-Norm: 107.218017578125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.7600040435791
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.43717360496521
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 272


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 272 loss: 0.8613873769839605, loss_t2i: 1.0063676959834993, loss_mmu: 0.2814660056804617
  Token-Norm: 106.97148132324219
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.68400001525879
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.4318562746047974
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 273


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 273 loss: 0.9048843343431751, loss_t2i: 1.0676368409767747, loss_mmu: 0.2538742452549438
  Token-Norm: 106.77530670166016
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.59791374206543
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.4255262613296509
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 274


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 274 loss: 1.0737528686101239, loss_t2i: 1.2592343520373106, loss_mmu: 0.3318268652074039
  Token-Norm: 108.02316284179688
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.51174545288086
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.419166922569275
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 275


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 275 loss: 1.1588689863371353, loss_t2i: 1.364317290795346, loss_mmu: 0.3370757467734317
  Token-Norm: 109.08654022216797
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.42620277404785
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.4128612279891968
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 276


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 276 loss: 1.0209579036260645, loss_t2i: 1.215736563007037, loss_mmu: 0.24184319019938508
  Token-Norm: 108.8853759765625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.34139060974121
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.4066935777664185
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 277


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 277 loss: 0.9710314723973473, loss_t2i: 1.1572835054248571, loss_mmu: 0.22602326143532991
  Token-Norm: 108.5814437866211
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.25650978088379
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.4006484746932983
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 278


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 278 loss: 0.7798565064246455, loss_t2i: 0.9131253299613794, loss_mmu: 0.24678111417839924
  Token-Norm: 108.46875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.17243766784668
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.394519567489624
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 279


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 279 loss: 1.002976714943846, loss_t2i: 1.1280280137434602, loss_mmu: 0.502771422577401
  Token-Norm: 108.86418914794922
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.148548126220703
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3646972179412842
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 280


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 280 loss: 1.1296483622863889, loss_t2i: 1.2965032712866862, loss_mmu: 0.4622286253919204
  Token-Norm: 109.25691986083984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.40093231201172
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3673571348190308
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 281


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 281 loss: 0.9302543322555721, loss_t2i: 1.0814266838133335, loss_mmu: 0.32556486036628485
  Token-Norm: 109.19093322753906
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.30613136291504
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3633427619934082
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 282


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 282 loss: 0.9660823857411742, loss_t2i: 1.1537542312095563, loss_mmu: 0.21539500433330735
  Token-Norm: 108.98896026611328
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.21559715270996
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3572474718093872
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 283


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 283 loss: 1.0109771117568016, loss_t2i: 1.2124336535731952, loss_mmu: 0.20515091988878945
  Token-Norm: 108.7453384399414
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.127534866333008
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3554555177688599
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 284


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 284 loss: 0.7003555855092903, loss_t2i: 0.8271254746553799, loss_mmu: 0.19327598080659905
  Token-Norm: 108.48348999023438
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.04088020324707
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3500615358352661
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 285


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 285 loss: 1.1776485278581579, loss_t2i: 1.415970746582995, loss_mmu: 0.22435963231449327
  Token-Norm: 108.35387420654297
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.951940536499023
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3441250324249268
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 286


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 286 loss: 1.035975663146625, loss_t2i: 1.2429874118727942, loss_mmu: 0.20792857146201035
  Token-Norm: 108.21869659423828
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.8636417388916
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3382927179336548
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 287


100%|██████████| 48/48 [00:20<00:00,  2.31it/s]


Epoch 287 loss: 1.5178342095265787, loss_t2i: 1.7828110037371516, loss_mmu: 0.4579269516592224
  Token-Norm: 108.4917984008789
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.775657653808594
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3323417901992798
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 288


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 288 loss: 1.0730682881549, loss_t2i: 1.2601988445967436, loss_mmu: 0.3245459543541074
  Token-Norm: 108.39275360107422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.6876277923584
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3264037370681763
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 289


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 289 loss: 1.0147582814097404, loss_t2i: 1.199524893425405, loss_mmu: 0.2756918005955716
  Token-Norm: 108.34013366699219
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.599811553955078
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3204855918884277
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 290


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 290 loss: 1.5634410753846169, loss_t2i: 1.513396826883157, loss_mmu: 1.7636179483185213
  Token-Norm: 111.97444152832031
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 20.306161880493164
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2984733581542969
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 291


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 291 loss: 1.0845205305765073, loss_t2i: 1.1526636152217786, loss_mmu: 0.8119480771323045
  Token-Norm: 113.46530151367188
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 20.471649169921875
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.310619592666626
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 292


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 292 loss: 0.7445620329429706, loss_t2i: 0.8118890194843212, loss_mmu: 0.4752540440919499
  Token-Norm: 113.55525970458984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 20.38258171081543
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3102474212646484
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 293


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 293 loss: 1.1685196192314227, loss_t2i: 1.3590350880598028, loss_mmu: 0.4064576073239247
  Token-Norm: 113.484130859375
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 20.302120208740234
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3080451488494873
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 294


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 294 loss: 0.8338356309880813, loss_t2i: 0.921113240144526, loss_mmu: 0.4847251319636901
  Token-Norm: 113.78091430664062
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 20.229557037353516
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3047751188278198
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 295


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 295 loss: 1.0889910322924454, loss_t2i: 1.2680834806524217, loss_mmu: 0.37262112740427256
  Token-Norm: 113.9118881225586
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 20.17523193359375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3165279626846313
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 296


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 296 loss: 0.8240584707818925, loss_t2i: 0.9675939672936996, loss_mmu: 0.24991638585925102
  Token-Norm: 113.68873596191406
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 20.094970703125
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3146424293518066
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 297


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 297 loss: 0.8091840138658881, loss_t2i: 0.9642249733830491, loss_mmu: 0.18902010408540568
  Token-Norm: 113.34967041015625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 20.000457763671875
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.308638095855713
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 298


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 298 loss: 0.8353530030387143, loss_t2i: 0.9972956391672293, loss_mmu: 0.18758239364251494
  Token-Norm: 113.04457092285156
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.905731201171875
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3103889226913452
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 299


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 299 loss: 0.7800075467675924, loss_t2i: 0.9304095528398951, loss_mmu: 0.17839943757280707
  Token-Norm: 112.65304565429688
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.81248664855957
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3044575452804565
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 300


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 300 loss: 0.8374923641482989, loss_t2i: 1.0025503849610686, loss_mmu: 0.17726021190173924
  Token-Norm: 112.30144500732422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.71980094909668
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2985553741455078
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 301


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 301 loss: 0.9515784778632224, loss_t2i: 1.1433388766211767, loss_mmu: 0.1845368007197976
  Token-Norm: 111.98924255371094
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.633686065673828
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2983460426330566
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 302


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 302 loss: 0.7534004229431351, loss_t2i: 0.9032001177159449, loss_mmu: 0.1542015594895929
  Token-Norm: 111.72762298583984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.543500900268555
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2928688526153564
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 303


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 303 loss: 0.7273181482839087, loss_t2i: 0.8711034037793676, loss_mmu: 0.15217710643385848
  Token-Norm: 111.40377044677734
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.452037811279297
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2870121002197266
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 304


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 304 loss: 0.7457141592167318, loss_t2i: 0.8864913698441038, loss_mmu: 0.18260527479772767
  Token-Norm: 111.31026458740234
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.36142349243164
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2812565565109253
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 305


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 305 loss: 0.7033799778049191, loss_t2i: 0.8351513914919148, loss_mmu: 0.17629425010333458
  Token-Norm: 111.20703125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.27104377746582
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.275499939918518
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 306


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 306 loss: 0.8925967887043953, loss_t2i: 1.0757548437298585, loss_mmu: 0.1599644822999835
  Token-Norm: 111.03083038330078
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.181289672851562
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2698220014572144
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 307


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 307 loss: 0.781820162354658, loss_t2i: 0.9410363235510886, loss_mmu: 0.14495546518204114
  Token-Norm: 110.76668548583984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.092575073242188
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2642544507980347
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 308


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 308 loss: 0.641732033342123, loss_t2i: 0.774093314850082, loss_mmu: 0.11228682861352961
  Token-Norm: 110.50406646728516
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.003257751464844
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.258610486984253
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 309


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 309 loss: 0.8553884451587995, loss_t2i: 1.0353471896766375, loss_mmu: 0.1355533441528678
  Token-Norm: 110.28126525878906
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.970041275024414
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2833436727523804
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 310


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 310 loss: 0.5679793338446567, loss_t2i: 0.6830456437698255, loss_mmu: 0.10771404575401296
  Token-Norm: 110.01976013183594
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 19.020408630371094
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.298810362815857
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 311


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 311 loss: 1.0900530284270644, loss_t2i: 1.1652040964302917, loss_mmu: 0.789448710779349
  Token-Norm: 121.12655639648438
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.510665893554688
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2882720232009888
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 312


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 312 loss: 0.5196422867787381, loss_t2i: 0.5696289769451445, loss_mmu: 0.3196954907228549
  Token-Norm: 121.4229736328125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.451486587524414
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.281138300895691
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 313


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 313 loss: 0.7071032929234207, loss_t2i: 0.8420385230953494, loss_mmu: 0.16736233358581862
  Token-Norm: 120.95781707763672
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.36492347717285
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.275241494178772
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 314


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 314 loss: 0.6556469717373451, loss_t2i: 0.775093648970748, loss_mmu: 0.1778601659461856
  Token-Norm: 120.48348236083984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.279666900634766
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2695653438568115
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 315


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 315 loss: 0.9066952696690956, loss_t2i: 1.067742780316621, loss_mmu: 0.2625052017780642
  Token-Norm: 120.0633773803711
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.19886589050293
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2790199518203735
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 316


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 316 loss: 0.7345004704159995, loss_t2i: 0.865264728354911, loss_mmu: 0.21144341017740467
  Token-Norm: 119.70138549804688
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.113277435302734
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2833114862442017
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 317


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 317 loss: 0.6933782584965229, loss_t2i: 0.7762806065535793, loss_mmu: 0.3617688766680658
  Token-Norm: 119.7056884765625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 18.063501358032227
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2862153053283691
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 318


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 318 loss: 0.9237829894603541, loss_t2i: 1.0930593194595228, loss_mmu: 0.2466775719076395
  Token-Norm: 119.30350494384766
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.96721076965332
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2815319299697876
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 319


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 319 loss: 0.8355882428586483, loss_t2i: 0.9804963367059827, loss_mmu: 0.2559557978529483
  Token-Norm: 119.1256332397461
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.800506591796875
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.285186767578125
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 320


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 320 loss: 0.936157729011029, loss_t2i: 1.1143422961855929, loss_mmu: 0.2234193601179868
  Token-Norm: 118.76872253417969
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.714426040649414
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.280539631843567
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 321


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 321 loss: 0.8000970762223005, loss_t2i: 0.9641160584675769, loss_mmu: 0.14402104475690672
  Token-Norm: 118.30777740478516
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.631275177001953
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2753026485443115
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 322


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 322 loss: 0.7921533932288488, loss_t2i: 0.8957763581226269, loss_mmu: 0.37766152651359636
  Token-Norm: 118.10265350341797
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.549097061157227
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.269565224647522
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 323


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 323 loss: 1.098323972740521, loss_t2i: 1.289270159167548, loss_mmu: 0.3345391162050267
  Token-Norm: 117.82426452636719
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.467164993286133
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2638139724731445
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 324


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 324 loss: 0.816595647794505, loss_t2i: 0.9738188042926291, loss_mmu: 0.1877029030583799
  Token-Norm: 117.40519714355469
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.384740829467773
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.259021520614624
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 325


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 325 loss: 0.6217380414406458, loss_t2i: 0.7428905265405774, loss_mmu: 0.13712807395495474
  Token-Norm: 116.94831848144531
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.303266525268555
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2533711194992065
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 326


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 326 loss: 0.7648781267926097, loss_t2i: 0.9272295952153703, loss_mmu: 0.1154721959028393
  Token-Norm: 116.4947280883789
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.22093963623047
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2480653524398804
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 327


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 327 loss: 0.6527246110296497, loss_t2i: 0.7772161668011298, loss_mmu: 0.15475837203363577
  Token-Norm: 116.07683563232422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.14079475402832
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.24250066280365
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 328


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 328 loss: 0.6172995797824115, loss_t2i: 0.7394621920927117, loss_mmu: 0.1286490373313427
  Token-Norm: 115.72713470458984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.07617950439453
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.243711233139038
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 329


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 329 loss: 0.6956672371986011, loss_t2i: 0.8480335922601322, loss_mmu: 0.08620174201981474
  Token-Norm: 115.24725341796875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.997663497924805
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.2382844686508179
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 330


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 330 loss: 0.6274726006668061, loss_t2i: 0.7569092304135362, loss_mmu: 0.10972605474914114
  Token-Norm: 114.81967163085938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 16.91826820373535
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.232663631439209
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 331


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 331 loss: 0.7000364179257303, loss_t2i: 0.8414395859775444, loss_mmu: 0.1344237196414421
  Token-Norm: 114.41444396972656
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 17.867538452148438
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.3651219606399536
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 332


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 332 loss: 0.6876478008925915, loss_t2i: 0.8331201260443777, loss_mmu: 0.10575839791757365
  Token-Norm: 114.091064453125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.858877182006836
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.820245385169983
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 333


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 333 loss: 0.4236767441810419, loss_t2i: 0.5100812157616019, loss_mmu: 0.07805880733455221
  Token-Norm: 113.69247436523438
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.800073623657227
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.969330906867981
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 334


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 334 loss: 0.7095698449217404, loss_t2i: 0.8596868661697954, loss_mmu: 0.10910167964175344
  Token-Norm: 113.30104064941406
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.792570114135742
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9681577682495117
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 335


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 335 loss: 0.5140316549999019, loss_t2i: 0.6046296885857979, loss_mmu: 0.15163947292603552
  Token-Norm: 113.22974395751953
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.685596466064453
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.95897376537323
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 336


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 336 loss: 0.6325570922344923, loss_t2i: 0.7623126150574535, loss_mmu: 0.11353496978214632
  Token-Norm: 112.9175033569336
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.577877044677734
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.949944257736206
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 337


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 337 loss: 0.9207286535141369, loss_t2i: 1.1258226023055613, loss_mmu: 0.10035284360249837
  Token-Norm: 112.51630401611328
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.536869049072266
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.979142427444458
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 338


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 338 loss: 0.6511134831234813, loss_t2i: 0.7866245983168483, loss_mmu: 0.10906903356468926
  Token-Norm: 112.19738006591797
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.450111389160156
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9740997552871704
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 339


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 339 loss: 0.8497451501122365, loss_t2i: 0.9933632738733044, loss_mmu: 0.2752725613148262
  Token-Norm: 112.20647430419922
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.344545364379883
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9649146795272827
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 340


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 340 loss: 0.8070351574569941, loss_t2i: 0.9339186061794559, loss_mmu: 0.2995012985387196
  Token-Norm: 112.91281127929688
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.239559173583984
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9557729959487915
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 341


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 341 loss: 0.5364563902063916, loss_t2i: 0.6400324949063361, loss_mmu: 0.12215192278381437
  Token-Norm: 112.70135498046875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.134735107421875
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9466139078140259
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 342


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 342 loss: 0.5348083710608383, loss_t2i: 0.6486040067781383, loss_mmu: 0.07962578566124041
  Token-Norm: 112.25798034667969
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.03044319152832
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9375019073486328
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 343


100%|██████████| 48/48 [00:21<00:00,  2.25it/s]


Epoch 343 loss: 0.5531009690991292, loss_t2i: 0.6762481663608924, loss_mmu: 0.060512136396331094
  Token-Norm: 111.82869720458984
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.926559448242188
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9285281896591187
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 344


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 344 loss: 0.5390507820993662, loss_t2i: 0.6524820289729784, loss_mmu: 0.08532574148072551
  Token-Norm: 111.50460052490234
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.822874069213867
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9194656610488892
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 345


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 345 loss: 0.5559622459113598, loss_t2i: 0.6757232025265694, loss_mmu: 0.07691839054071654
  Token-Norm: 111.17080688476562
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.72015380859375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9104995727539062
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 346


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 346 loss: 0.7053687457228079, loss_t2i: 0.8657220138702542, loss_mmu: 0.06395565318719794
  Token-Norm: 110.76358795166016
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.617782592773438
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.9015544652938843
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 347


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 347 loss: 0.5443701229135817, loss_t2i: 0.6614023677927131, loss_mmu: 0.07624113010630633
  Token-Norm: 110.3974380493164
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.515756607055664
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.8926374912261963
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 348


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 348 loss: 0.3621297630403812, loss_t2i: 0.4370828850660473, loss_mmu: 0.062317258290325604
  Token-Norm: 110.00641632080078
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.414457321166992
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.8841744661331177
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 349


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 349 loss: 0.7230786983467018, loss_t2i: 0.8922443736810237, loss_mmu: 0.046415890411784254
  Token-Norm: 109.5789566040039
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.313631057739258
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.8754332065582275
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 350


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 350 loss: 0.6297505281787986, loss_t2i: 0.773997922311537, loss_mmu: 0.05276091204723343
  Token-Norm: 109.14568328857422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.212919235229492
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.86661696434021
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 351


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 351 loss: 0.7451814788316066, loss_t2i: 0.9162166625416527, loss_mmu: 0.06104071814722071
  Token-Norm: 108.83056640625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.113954544067383
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 1.8580187559127808
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 352


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 352 loss: 0.6009381456145396, loss_t2i: 0.7280869464933252, loss_mmu: 0.0923428968526423
  Token-Norm: 108.60824584960938
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.54181480407715
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.0488579273223877
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 353


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 353 loss: 0.8105664231504003, loss_t2i: 0.9799565323240435, loss_mmu: 0.13300593655245999
  Token-Norm: 108.68587493896484
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.42718505859375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.211071014404297
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 354


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 354 loss: 0.7901813982365032, loss_t2i: 0.9625829455132285, loss_mmu: 0.10057513571033876
  Token-Norm: 108.57286834716797
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 24.243465423583984
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.415520191192627
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 355


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 355 loss: 0.5277157295495272, loss_t2i: 0.6435671239936104, loss_mmu: 0.06431009443864848
  Token-Norm: 108.23479461669922
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 24.247509002685547
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.4525136947631836
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 356


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 356 loss: 0.45146568515338004, loss_t2i: 0.5508902547881007, loss_mmu: 0.05376733645486335
  Token-Norm: 107.83242797851562
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 24.14042091369629
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.4424431324005127
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 357


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 357 loss: 0.6512726351308326, loss_t2i: 0.7997803215015059, loss_mmu: 0.057241846865508705
  Token-Norm: 107.50648498535156
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 24.02619743347168
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.430894136428833
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 358


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 358 loss: 0.5775157214763263, loss_t2i: 0.7107192676048726, loss_mmu: 0.044701471633743495
  Token-Norm: 107.11083221435547
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.912342071533203
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.4193758964538574
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 359


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 359 loss: 0.48036177273994934, loss_t2i: 0.5850187060811246, loss_mmu: 0.06173398667791238
  Token-Norm: 106.76056671142578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.79931640625
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.4079415798187256
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 360


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 360 loss: 0.9211535082819561, loss_t2i: 1.1271782208156462, loss_mmu: 0.09705463346714775
  Token-Norm: 106.56974029541016
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.687379837036133
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3966264724731445
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 361


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 361 loss: 0.6360870780578504, loss_t2i: 0.7731530392387261, loss_mmu: 0.08782316755969077
  Token-Norm: 106.6534423828125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.606611251831055
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.4081332683563232
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 362


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 362 loss: 0.4950918769463897, loss_t2i: 0.601647608137379, loss_mmu: 0.06886890286114067
  Token-Norm: 106.42707061767578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.496864318847656
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3973662853240967
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 363


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 363 loss: 0.6117841186933219, loss_t2i: 0.7459861264020825, loss_mmu: 0.07497599946024518
  Token-Norm: 106.68119049072266
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.386207580566406
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.386087417602539
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 364


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 364 loss: 0.4827976317998643, loss_t2i: 0.5853620873143276, loss_mmu: 0.07253977664125462
  Token-Norm: 106.36456298828125
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.276226043701172
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3748836517333984
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 365


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 365 loss: 0.5631703103426844, loss_t2i: 0.6718214203914007, loss_mmu: 0.12856582874276987
  Token-Norm: 106.41033935546875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.166419982910156
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3636951446533203
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 366


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 366 loss: 0.5366422199488928, loss_t2i: 0.6338629516152045, loss_mmu: 0.14775921353915086
  Token-Norm: 106.65108489990234
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.05959129333496
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3569672107696533
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 367


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 367 loss: 0.9259689468890429, loss_t2i: 1.074710681879272, loss_mmu: 0.3310018948589762
  Token-Norm: 108.17243194580078
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.190692901611328
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.368460178375244
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 368


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 368 loss: 0.8375581859921416, loss_t2i: 0.9233420618499318, loss_mmu: 0.49442260805517435
  Token-Norm: 110.49040985107422
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 23.09375
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3599374294281006
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 369


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 369 loss: 0.9168650999975702, loss_t2i: 1.0538140437565744, loss_mmu: 0.3690691921704759
  Token-Norm: 111.72830963134766
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.984661102294922
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.348828077316284
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 370


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 370 loss: 0.7695521297864616, loss_t2i: 0.9115729813929647, loss_mmu: 0.2014686493203044
  Token-Norm: 111.79056549072266
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.876991271972656
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.337800979614258
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 371


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 371 loss: 0.6225811696300904, loss_t2i: 0.7452163653215393, loss_mmu: 0.1320403286954388
  Token-Norm: 111.55982208251953
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.770912170410156
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3262646198272705
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 372


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 372 loss: 0.7346945330500603, loss_t2i: 0.8946543997929742, loss_mmu: 0.09485496790148318
  Token-Norm: 111.2039794921875
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.66329574584961
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3152666091918945
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 373


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 373 loss: 0.6059329276904464, loss_t2i: 0.7358478227785478, loss_mmu: 0.08627328051564594
  Token-Norm: 110.89285278320312
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.556156158447266
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.3043289184570312
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 374


100%|██████████| 48/48 [00:20<00:00,  2.29it/s]


Epoch 374 loss: 0.502466787196075, loss_t2i: 0.6140646095154807, loss_mmu: 0.05607544529872636
  Token-Norm: 110.54452514648438
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.44938850402832
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.293414354324341
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 375


100%|██████████| 48/48 [00:21<00:00,  2.27it/s]


Epoch 375 loss: 0.5323319438224038, loss_t2i: 0.6411651829257607, loss_mmu: 0.0969989401443551
  Token-Norm: 110.41227722167969
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.34329605102539
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.2826039791107178
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 376


100%|██████████| 48/48 [00:21<00:00,  2.28it/s]


Epoch 376 loss: 0.5980152968550101, loss_t2i: 0.7295554948116963, loss_mmu: 0.07185445174885292
  Token-Norm: 110.08990478515625
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.242841720581055
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.2705936431884766
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 377


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 377 loss: 0.8684215375687927, loss_t2i: 1.0520121667068452, loss_mmu: 0.13405891372046122
  Token-Norm: 110.02300262451172
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.138389587402344
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.259883403778076
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 378


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]


Epoch 378 loss: 0.4326389562726642, loss_t2i: 0.522078134235926, loss_mmu: 0.07488221232779324
  Token-Norm: 109.88758850097656
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 22.033964157104492
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.24922513961792
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 379


100%|██████████| 48/48 [00:21<00:00,  2.29it/s]


Epoch 379 loss: 0.5859394127813479, loss_t2i: 0.7107148211604605, loss_mmu: 0.08683773735538125
  Token-Norm: 109.68292999267578
  index_no_updates-Token-Norm: 475.90191650390625
  LM-Head-Weight-Norm: 21.930091857910156
  index_no_updates-LM-Head-Weight-Norm: 636.0464477539062
  LM-Head-Bias-Norm: 2.2386691570281982
  index_no_updates-LM-Head-Bias-Norm: 200.17538452148438
Epoch 380


100%|██████████| 48/48 [00:20<00:00,  2.30it/s]