<a href="https://colab.research.google.com/github/JackyLiu47/memeBlip/blob/main/memeBlip.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvcc --version
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install ftfy regex tqdm
!pip install yacs
!pip install torch transformers pytorch-lightning

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting ftfy
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ftfy
Successfully installed ftfy-6.3.1
Collecting yacs
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Installing collected packages: yacs
Successfully installed yacs-0.1.8
Collecting pytorch-lightning
  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.6.1-py3-none-any.whl.meta

In [None]:
# !pip install numba

# from numba import cuda
# device = cuda.get_current_device()
# device.reset()
import torch

print("CUDA Available:", torch.cuda.is_available())
print("CUDA Device Count:", torch.cuda.device_count())
print("Current Device:", torch.cuda.current_device())
print("Device Name:", torch.cuda.get_device_name(torch.cuda.current_device()))

CUDA Available: True
CUDA Device Count: 1
Current Device: 0
Device Name: NVIDIA A100-SXM4-40GB


In [None]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torchmetrics
from transformers import BlipProcessor, BlipForConditionalGeneration
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
import pandas as pd
from yacs.config import CfgNode
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [None]:
class Custom_Dataset(Dataset):
    def __init__(self, cfg, root_folder, dataset, label, split='train', image_size=224, fast=True):
        super(Custom_Dataset, self).__init__()
        self.cfg = cfg
        self.root_folder = root_folder
        self.dataset = dataset
        self.split = split
        self.label = label

        self.image_size = image_size
        self.fast = fast

        self.info_file = cfg.info_file
        self.df = pd.read_csv(self.info_file)
        self.df = self.df[self.df['split'] == self.split].reset_index(drop=True)

        if self.label == 'target':
            self.df = self.df[self.df['hate'] == 1].reset_index(drop=True)

        float_cols = self.df.select_dtypes(float).columns
        self.df[float_cols] = self.df[float_cols].fillna(-1).astype('Int64')

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        if row['text'] == 'None':
            text = 'null'
        else:
            text = row['text']

        image_fn = row['name']
        try:
            image = Image.open(f"{self.cfg.img_folder}/{image_fn}").convert('RGB')
            image = image.resize((self.image_size, self.image_size))
        except Exception as e:
            raise ValueError(f"Error loading image {image_fn}: {e}")

        item = {
        'image': image,
        'text': text,
        'label': row[self.label],
        'idx_meme': row['name'],
        }
        # print(f"Text length: {len(text)}")
        # if len(text) > 512:
        #     print(f"Warning: Truncated text: {text[:512]}")
        # print(f"Dataset item: {item.keys()}")  # 打印 keys
        return item

In [None]:
class Custom_Collator(object):
    def __init__(self, cfg):
        self.cfg = cfg
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")

    def __call__(self, batch):
        images = [item['image'] for item in batch]
        texts = [item['text'] for item in batch]
        labels = torch.LongTensor([item['label'] for item in batch])
        # print(f"Labels content: {labels}")  # 打印标签内容
        # print(f"Labels type: {type(labels)}")  # 打印标签类型
        # print(f"Labels device before transfer: {torch.LongTensor(labels).device}")  # 打印设备

        # 使用 BLIP Processor 处理图像和文本
        inputs = self.processor(
            images=images,
            text=texts,
            return_tensors="pt",
            padding="max_length",  # 填充到最大长度
            truncation=True,       # 截断超过最大长度的文本
            max_length=512         # 设置最大长度
        )
        # print(f"Processor outputs keys: {inputs.keys()}")
        # print(f"Pixel values shape: {inputs['pixel_values'].shape}")
        # print(f"Pixel values dtype: {inputs['pixel_values'].dtype}")

        # 确保处理后的张量在正确的设备上
        inputs["pixel_values"] = inputs["pixel_values"].to(self.cfg.device)
        inputs["input_ids"] = inputs["input_ids"].to(self.cfg.device)
        inputs["attention_mask"] = inputs["attention_mask"].to(self.cfg.device)

        return {
            'pixel_values': inputs['pixel_values'],  # 预处理后的图像
            'input_ids': inputs['input_ids'],        # 预处理后的文本
            'attention_mask': inputs['attention_mask'],  # 文本注意力掩码
            'labels': labels,                         # 标签
        }


In [None]:
from transformers import BlipProcessor, BlipModel, BlipConfig
import torch

class MemeBLIP_Collator:
    def __init__(self, cfg):
        self.cfg = cfg
        # 修改配置
        blipconfig = BlipConfig.from_pretrained("Salesforce/blip-image-captioning-base")
        blipconfig.max_position_embeddings = 1024  # 增加最大序列长度
        # 初始化 BLIP 模型和处理器
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
        self.blip_model = BlipModel.from_pretrained("Salesforce/blip-image-captioning-base", config=blipconfig).to(self.cfg.device)
        self.blip_model.eval()

    def __call__(self, batch):
        # 准备存储的特征列表
        image_features_list = []
        text_features_list = []

        labels = torch.LongTensor([item['label'] for item in batch])
        idx_memes = [item['idx_meme'] for item in batch]

        batch_new = {'labels': labels,
                     'idx_memes': idx_memes,
                     }

        for item in batch:
            # 图像处理
            processed = self.processor(images=item['image'], text=item['text'], return_tensors="pt", max_length=512, Truncation = True).to(self.cfg.device)

            # 特征提取
            image_features, text_features = self.compute_features(processed)

            # 收集特征
            image_features_list.append(image_features.cpu().detach())
            text_features_list.append(text_features.cpu().detach())

        # 合并特征
        batch_new['image_features'] = torch.cat(image_features_list, dim=0),
        batch_new['text_features'] = torch.cat(text_features_list, dim=0),

        return batch_new

    def compute_features(self, processed):
        # 从 BLIP 提取视觉和文本特征
        outputs = self.blip_model(**processed)
        image_features = outputs.vision_model_output.last_hidden_state.mean(dim=1)  # 平均池化图像特征
        text_features = outputs.text_model_output.last_hidden_state.mean(dim=1)    # 平均池化文本特征

        return image_features, text_features


In [None]:
def load_dataset(cfg, split):
    dataset = Custom_Dataset(
        cfg=cfg,
        root_folder=cfg.root_dir,
        dataset=cfg.dataset_name,
        split=split,
        image_size=cfg.image_size,
        label=cfg.label,
        fast=cfg.fast_process
    )
    return dataset


In [None]:
def create_dataloader(cfg, split="train"):
    dataset = load_dataset(cfg, split)
    collator = MemeBLIP_Collator(cfg)
    dataloader = DataLoader(
        dataset,
        batch_size=cfg.batch_size,
        shuffle=(split == "train"),
        collate_fn=collator
    )
    return dataloader


In [None]:
cfg = CfgNode()

# 路径设置
cfg.root_dir = './'
cfg.img_folder = '/content/drive/MyDrive/Colab_Notebooks/MemeCLIP-main/dataset/Images'
cfg.info_file = '/content/drive/MyDrive/Colab_Notebooks/MemeCLIP-main/dataset/PrideMM.csv'
cfg.checkpoint_path = os.path.join(cfg.root_dir, 'checkpoints')
cfg.checkpoint_file = os.path.join(cfg.checkpoint_path, 'model.ckpt')

# 模型与数据集设置
cfg.clip_variant = "ViT-L/14"
cfg.dataset_name = 'Pride'
cfg.name = 'MemeBLIP'
cfg.label = 'hate'
cfg.seed = 42
cfg.test_only = False
cfg.device = 'cuda'
cfg.gpus = [0]

# 根据任务类型动态设置类别
if cfg.label == 'hate':
    cfg.class_names = ['Benign Meme', 'Harmful Meme']
elif cfg.label == 'humour':
    cfg.class_names = ['No Humour', 'Humour']
elif cfg.label == 'target':
    cfg.class_names = ['No particular target', 'Individual', 'Community', 'Organization']
elif cfg.label == 'stance':
    cfg.class_names = ['Neutral', 'Support', 'Oppose']

# 超参数设置
cfg.batch_size = 16
cfg.image_size = 224
cfg.num_mapping_layers = 1
cfg.unmapped_dim = 768
cfg.map_dim = 1024
cfg.num_pre_output_layers = 1
cfg.drop_probs = [0.1, 0.4, 0.2]
cfg.lr = 1e-4
cfg.max_epochs = 10
cfg.weight_decay = 1e-4
cfg.num_classes = len(cfg.class_names)
cfg.scale = 30
cfg.print_model = True
cfg.fast_process = True
cfg.reproduce = False
cfg.ratio = 0.7

print(cfg)


batch_size: 16
checkpoint_file: ./checkpoints/model.ckpt
checkpoint_path: ./checkpoints
class_names: ['Benign Meme', 'Harmful Meme']
clip_variant: ViT-L/14
dataset_name: Pride
device: cuda
drop_probs: [0.1, 0.4, 0.2]
fast_process: True
gpus: [0]
image_size: 224
img_folder: /content/drive/MyDrive/Colab_Notebooks/MemeCLIP-main/dataset/Images
info_file: /content/drive/MyDrive/Colab_Notebooks/MemeCLIP-main/dataset/PrideMM.csv
label: hate
lr: 0.0001
map_dim: 1024
max_epochs: 10
name: MemeBLIP
num_classes: 2
num_mapping_layers: 1
num_pre_output_layers: 1
print_model: True
ratio: 0.7
reproduce: False
root_dir: ./
scale: 30
seed: 42
test_only: False
unmapped_dim: 768
weight_decay: 0.0001


In [None]:
data_file = "/content/drive/MyDrive/Colab_Notebooks/MemeCLIP-main/dataset/PrideMM.csv"
df = pd.read_csv(data_file)
print(df.columns)

# 加载训练和验证数据
train_loader = create_dataloader(cfg, split="train")
val_loader = create_dataloader(cfg, split="val")

for batch in train_loader:
    print(batch.keys())
    # print(batch['image_features'].shape)  # 图像
    # print(batch['text_features'].shape)     # 文本
    # print(f"image_features device: {batch['image_features'].device}")
    # print(f"text_features device: {batch['text_features'].device}")
    # print(f"image_features shape: {batch['image_features'].shape}")
    # print(f"text_features shape: {batch['text_features'].shape}")
    break


Index(['name', 'hate', 'target', 'stance', 'humour', 'split', 'text'], dtype='object')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

`BlipModel` is going to be deprecated in future release, please use `BlipForConditionalGeneration`, `BlipForQuestionAnswering` or `BlipForImageTextRetrieval` depending on your usecase.
Some weights of BlipModel were not initialized from the model checkpoint at Salesforce/blip-image-captioning-base and are newly initialized: ['logit_scale', 'text_model.embeddings.LayerNorm.bias', 'text_model.embeddings.LayerNorm.weight', 'text_model.embeddings.position_embeddings.weight', 'text_model.embeddings.word_embeddings.weight', 'text_model.encoder.layer.0.attention.output.LayerNorm.bias', 'text_model.encoder.layer.0.attention.output.LayerNorm.weight', 'text_model.encoder.layer.0.attention.output.dense.bias', 'text_model.encoder.layer.0.attention.output.dense.weight', 'text_model.encoder.layer.0.attention.self.key.bias', 'text_model.encoder.layer.0.attention.self.key.weight', 'text_model.encoder.layer.0.attention.self.query.bias', 'text_model.encoder.layer.0.attention.self.query.weight', 'text_mo

dict_keys(['labels', 'idx_memes', 'image_features', 'text_features'])


In [None]:
class LinearProjection(nn.Module):
    def __init__(self, input_dim, output_dim, num_layers, drop_probs):
        super(LinearProjection, self).__init__()

        map_layers = [nn.Linear(input_dim, output_dim),
                      nn.Dropout(p=drop_probs)]

        for _ in range(1, num_layers):
            map_layers.extend(
                [nn.ReLU(), nn.Linear(output_dim, output_dim), nn.Dropout(p=drop_probs)])

        self.proj = nn.Sequential(*map_layers)

    def __call__(self, *args, **kwargs):
        return super().__call__(*args, **kwargs)

    def forward(self, x):
        return self.proj(x)

class Adapter(nn.Module):
    def __init__(self, c_in, reduction=4):
        super(Adapter, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(c_in, c_in // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(c_in // reduction, c_in, bias=False),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.fc(x)


In [None]:
class MemeBLIP(pl.LightningModule):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg

        # LinearProjection
        self.image_projection = LinearProjection(
            input_dim=cfg.unmapped_dim,
            output_dim=cfg.map_dim,
            num_layers=cfg.num_mapping_layers,
            drop_probs=cfg.drop_probs[0]
        ).to(self.cfg.device)
        self.text_projection = LinearProjection(
            input_dim=cfg.unmapped_dim,
            output_dim=cfg.map_dim,
            num_layers=cfg.num_mapping_layers,
            drop_probs=cfg.drop_probs[0]
        ).to(self.cfg.device)

        # Adapter
        self.image_adapter = Adapter(cfg.map_dim, reduction=4).to(self.cfg.device)
        self.text_adapter = Adapter(cfg.map_dim, reduction=4).to(self.cfg.device)

        # 加载 BLIP 模型和处理器
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
        self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(cfg.device)

        self.map_dim = cfg.map_dim  # BLIP 模型的隐藏层大小

        self.classifier = nn.Linear(cfg.map_dim, cfg.num_classes)

        # 损失函数
        self.cross_entropy_loss = nn.CrossEntropyLoss()

        # 评估指标
        self.acc = torchmetrics.Accuracy(task="multiclass", num_classes=cfg.num_classes)
        self.auroc = torchmetrics.AUROC(task="multiclass", num_classes=cfg.num_classes)
        self.f1 = torchmetrics.F1Score(task="multiclass", num_classes=cfg.num_classes)
        self.model = self.model.to(cfg.device)
        self.classifier = self.classifier.to(cfg.device)

    def forward(self, batch):
        # 提取特征
        image_features = batch['image_features']
        text_features = batch['text_features']

        if isinstance(image_features, tuple):
          image_features = image_features[0].to(self.cfg.device)
        if isinstance(text_features, tuple):
          text_features = text_features[0].to(self.cfg.device)

        # Linear Projection
        image_proj = self.image_projection(image_features).to(self.cfg.device)
        text_proj = self.text_projection(text_features).to(self.cfg.device)

        # Adapter
        adapted_image = self.image_adapter(image_proj).to(self.cfg.device)
        adapted_text = self.text_adapter(text_proj).to(self.cfg.device)

        # 特征融合
        combined_features = torch.mul(adapted_image, adapted_text).to(self.cfg.device)

        logits = self.classifier(combined_features).to(self.cfg.device)
        return logits

        # # 获取 BLIP 模型输出
        # outputs = self.model(
        #     pixel_values=batch["pixel_values"],
        #     input_ids=batch["input_ids"],
        #     attention_mask=batch["attention_mask"],
        #     output_hidden_states=True,  # 返回隐藏状态
        # )
        # hidden_states = outputs.hidden_states[-1]  # 获取最后一层隐藏状态
        # # print(f"Hidden states shape: {hidden_states.shape}")  # 调试用
        # pooled_output = hidden_states.mean(dim=1)  # 平均池化

        # # 分类器输出
        # logits = self.classifier(pooled_output)
        # return logits

    def common_step(self, batch):
        logits = self.forward(batch)  # 使用分类器的输出
        loss = self.cross_entropy_loss(logits, batch["labels"])  # 标签大小为 [batch_size]
        preds = torch.argmax(logits, dim=-1)
        acc = self.acc(preds, batch["labels"])
        f1 = self.f1(preds, batch["labels"])
        return {"loss": loss, "acc": acc, "f1": f1}

    def training_step(self, batch, batch_idx):
        logits = self.forward(batch)
        loss = self.cross_entropy_loss(logits, batch["labels"])

        preds = torch.argmax(logits, dim=-1)
        acc = self.acc(preds, batch["labels"])
        auroc = self.auroc(torch.softmax(logits, dim=-1), batch["labels"])
        f1 = self.f1(preds, batch["labels"])

        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log("train_acc", acc, on_step=True, on_epoch=True, prog_bar=True)
        self.log("train_auroc", auroc, on_step=True, on_epoch=True, prog_bar=True)
        self.log("train_f1", f1, on_step=True, on_epoch=True, prog_bar=True)

        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.cfg.lr, weight_decay=self.cfg.weight_decay)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
        return [optimizer], [scheduler]

    def validation_step(self, batch, batch_idx):
        # 前向传播
        logits = self.forward(batch)
        loss = self.cross_entropy_loss(logits, batch["labels"])

        # 预测和计算指标
        preds = torch.argmax(logits, dim=-1)
        acc = self.acc(preds, batch["labels"])
        auroc = self.auroc(torch.softmax(logits, dim=-1), batch["labels"])
        f1 = self.f1(preds, batch["labels"])

        # 日志记录
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        self.log("val_auroc", auroc, prog_bar=True)
        self.log("val_f1", f1, prog_bar=True)

        return {"loss": loss, "acc": acc, "auroc": auroc, "f1": f1}

In [None]:
for batch in val_loader:
    logits = model(batch)
    preds = torch.argmax(logits, dim=-1)
    print("Predictions:", preds)
    print("Ground truth:", batch["labels"])
    break

In [None]:
model = MemeBLIP(cfg)
for batch in train_loader:
    model.forward(batch)  # 输出隐藏状态形状
    break
trainer = pl.Trainer(
    max_epochs=cfg.max_epochs,
    accelerator="gpu",
    devices=len(cfg.gpus),
    logger=pl.loggers.TensorBoardLogger("logs/")
)
# trainer = pl.Trainer(
#     max_epochs=cfg.max_epochs,
#     accelerator="gpu" if cfg.device == "cuda" else "cpu",
#     devices=len(cfg.gpus) if cfg.device == "cuda" else 1,
# )


# 检查模型输出
# logits = model.forward(batch)
# print(f"logits shape: {logits.shape}")  # 应为 [batch_size, num_classes]
trainer.fit(model, train_loader, val_loader)
validation_metrics = trainer.validate(model, val_loader, verbose=True)
print("Validation Metrics:", validation_metrics)

print("Validation Accuracy:", trainer.callback_metrics["val_acc"])
print("Validation AUROC:", trainer.callback_metrics["val_auroc"])
print("Validation F1 Score:", trainer.callback_metrics["val_f1"])

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name               | Type                         | Params | Mode 
----------------------------------------------------------------------------
0 | image_projection   | LinearProjection             | 787 K  | train


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 16. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/da

Training: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 8. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Validation: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 4. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]