In [1]:
from torchvision.datasets import ImageFolder
import numpy as np
import torch


# 设置随机数生成器种子
seed = 24
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

# 数据集根目录
data_root = './data/dataset_new/tongue_color'
dataset = ImageFolder(root=data_root)

class_names = dataset.classes

print(len(dataset))
print(dataset.class_to_idx)
print(dataset[0])

92
{'dan_bai': 0, 'dan_hong': 1, 'jiang_she': 2, 'qing_zi': 3}
(<PIL.Image.Image image mode=RGB size=1664x1452 at 0x28F65EC3010>, 0)


In [2]:
from torchvision.transforms import transforms
from torch.utils.data import DataLoader


dataset_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

data_loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)
data_loader.dataset.transform = dataset_transform
# 获取所有图像数据
all_images = None

for inputs, _ in data_loader:
    all_images = inputs

# 计算数据集的均值和标准差
mean = torch.mean(all_images, dim=(0, 2, 3))
std = torch.std(all_images, dim=(0, 2, 3))

print(f"均值：{mean}")
print(f"标准差：{std}")

均值：tensor([0.4791, 0.4038, 0.4168])
标准差：tensor([0.2908, 0.2599, 0.2675])


In [3]:
from torch.utils.data import random_split
from imblearn.over_sampling import RandomOverSampler
from torch.utils.data import TensorDataset


class Cutout(object):
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        h, w = img.size(1), img.size(2)
        mask = np.ones((h, w), np.float32)

        for _ in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1:y2, x1:x2] = 0

        mask = torch.from_numpy(mask)
        img *= mask.unsqueeze(0)

        return img


# 数据增强和预处理
data_augmentation_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15),
    transforms.RandomApply(transforms.GaussianBlur(3, sigma=(0.1, 2.0)), 0.5),
    Cutout(n_holes=8, length=32),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])


# # 计算每个类别的样本数量
# class_counts = torch.bincount(torch.tensor(dataset.targets))

# # 计算每个类别的权重
# class_weights = 1 / class_counts.float()


# 划分数据集
train_size = int(4 / 6.0 * len(dataset))
val_size = int(1 / 6.0 * len(dataset))
test_size = len(dataset) - train_size - val_size
remaining_size = len(dataset) - train_size
train_dataset, val_dataset, test_dataset = random_split(
    dataset, [train_size, val_size, test_size])

train_dataset.dataset.transform = data_augmentation_transform
val_dataset.dataset.transform = transform
test_dataset.dataset.transform = transform

# 获取训练集的特征和标签
X_train = []
y_train = []
for inputs, labels in train_dataset:
    X_train.append(inputs.numpy())  # 将张量转换为 numpy 数组
    y_train.append(labels)
X_train = np.array(X_train)
y_train = np.array(y_train)

# 创建 RandomOverSampler 实例
ros = RandomOverSampler(random_state=24)

# 进行过采样
X_resampled, y_resampled = ros.fit_resample(
    X_train.reshape(X_train.shape[0], -1), y_train)

# 将特征转换回张量形式
X_resampled = torch.tensor(X_resampled.reshape(-1, *inputs.shape))

# 创建过采样后的训练集
oversampled_train_dataset = TensorDataset(
    X_resampled, torch.tensor(y_resampled))

# 创建 DataLoader
train_loader = DataLoader(oversampled_train_dataset,
                          batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)
test_loader = DataLoader(test_dataset, batch_size=8)

In [4]:
# 输出数据集大小
print("过采样后的训练集大小:", len(oversampled_train_dataset))
print("验证集大小:", len(val_dataset))
print("测试集大小:", len(test_dataset))

for inputs, labels in train_loader:
    print(inputs.shape, labels.shape)
    print(inputs[0])
    break

for inputs, labels in val_loader:
    print(inputs.shape, labels.shape)
    print(inputs[0])
    break

id2label = {id: label for id, label in enumerate(
    train_dataset.dataset.classes)}
label2id = {label: id for id, label in id2label.items()}

过采样后的训练集大小: 148
验证集大小: 15
测试集大小: 16
torch.Size([8, 3, 224, 224]) torch.Size([8])
tensor([[[-1.6473, -1.6473, -1.6473,  ..., -1.6473, -1.6473, -1.6473],
         [-1.6473, -1.6473, -1.6473,  ..., -1.6473, -1.6473, -1.6473],
         [-1.6473, -1.6473, -1.6473,  ..., -1.6473, -1.6473, -1.6473],
         ...,
         [-1.6473, -1.6473, -1.6473,  ..., -1.6473, -1.6473, -1.6473],
         [-1.6473, -1.6473, -1.6473,  ..., -1.6473, -1.6473, -1.6473],
         [-1.6473, -1.6473, -1.6473,  ..., -1.6473, -1.6473, -1.6473]],

        [[-1.5536, -1.5536, -1.5536,  ..., -1.5536, -1.5536, -1.5536],
         [-1.5536, -1.5536, -1.5536,  ..., -1.5536, -1.5536, -1.5536],
         [-1.5536, -1.5536, -1.5536,  ..., -1.5536, -1.5536, -1.5536],
         ...,
         [-1.5536, -1.5536, -1.5536,  ..., -1.5536, -1.5536, -1.5536],
         [-1.5536, -1.5536, -1.5536,  ..., -1.5536, -1.5536, -1.5536],
         [-1.5536, -1.5536, -1.5536,  ..., -1.5536, -1.5536, -1.5536]],

        [[-1.5580, -1.5580, -1.5580

In [5]:
from transformers import AutoFeatureExtractor


# 加载 Vision Transformer 的特征提取器
model_name = 'microsoft/swin-base-patch4-window7-224'
feature_extractor = AutoFeatureExtractor.from_pretrained(
    model_name,
    do_normalize=False,
    do_resize=False,
    do_rescale=False,
)
# feature_extractor.image_mean = mean
# feature_extractor.image_std = std
feature_extractor



ViTFeatureExtractor {
  "_valid_processor_keys": [
    "images",
    "do_resize",
    "size",
    "resample",
    "do_rescale",
    "rescale_factor",
    "do_normalize",
    "image_mean",
    "image_std",
    "return_tensors",
    "data_format",
    "input_data_format"
  ],
  "do_normalize": false,
  "do_rescale": false,
  "do_resize": false,
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "ViTFeatureExtractor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 224,
    "width": 224
  }
}

In [6]:
from torch.utils.data.dataset import Dataset


class MyDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]


def custom_collate(batch):

    # 将样本数据列表拆分为输入和标签
    images, labels = list(zip(*batch))
    inputs = feature_extractor(images, return_tensors='pt')

    inputs['labels'] = torch.tensor(labels)
    inputs['pixel_values'] = torch.stack(
        [input for input in inputs["pixel_values"]])
    return inputs


train_data = MyDataset(train_dataset)
val_data = MyDataset(val_dataset)
test_data = MyDataset(test_dataset)

In [7]:
from transformers import SwinForImageClassification

model = SwinForImageClassification.from_pretrained(model_name, num_labels=len(dataset.classes), id2label=id2label, label2id=label2id, ignore_mismatched_sizes = True)

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
from sklearn.metrics import accuracy_score


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return dict(accuracy=accuracy_score(predictions, labels))
    # logits, labels = eval_pred
    # predictions = logits.argsort(axis=-1)[:, -5:]  # 获取top-5预测结果
    # # 将标签扩展为与预测结果相同的形状
    # labels_expanded = labels.reshape(-1, 1).repeat(5, axis=1)
    # correct_predictions = (predictions == labels_expanded)  # 判断预测结果是否包含正确标签
    # top5_accuracy = correct_predictions.any(axis=-1).mean()  # 计算top-5准确率
    # return {"accuracy": top5_accuracy}

In [9]:
from transformers import TrainingArguments, Trainer


def train(train_dataset: Dataset,
          valid_dataset: Dataset,
          pre_model_path: str
          ) -> None:
    """[summary]

    Args:
        train_dataset (Dataset): [训练集]
        valid_dataset (Dataset): [验证集]
        pre_model_path (str): [预训练模型文件夹]

    Returns:
        [type]: [description]
    """

    args = TrainingArguments(
        # report_to="wandb",
        output_dir=f"F:/model_checkpoint/tongue-color-classification",
        remove_unused_columns=False,
        save_strategy="epoch",
        evaluation_strategy="epoch",
        learning_rate=5e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=30,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="accuracy",
        logging_dir='logs',
        warmup_ratio=0.1,
        logging_steps=10,
        seed=seed,
    )

    # 每次训练都要重新载入模型，定义一个函数返回初始化的预训练模型
    def model_init():
        return SwinForImageClassification.from_pretrained(pre_model_path, num_labels=len(dataset.classes), id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True)

    # 定义一个trainer
    trainer = Trainer(
        # 注意此处不是model
        model_init=model_init,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=valid_dataset,
        data_collator=custom_collate,
        compute_metrics=compute_metrics,
        tokenizer=feature_extractor,
    )

    # 超参数搜索范围
    def hp_space(trial):
        return {
            "learning_rate": trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True),
            "weight_decay": trial.suggest_float("weight_decay", 0.01, 0.3),
            "num_train_epochs": trial.suggest_int("num_train_epochs", 2, 30),
            "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [4, 8, 16, 32]),
        }

    # trainer.hyperparameter_search方法进行超参数搜索
    best_trial = trainer.hyperparameter_search(
        direction="maximize",
        backend="optuna",
        n_trials=10,
        hp_space=hp_space
    )

    print("*************************************")
    print(" Best run %s" % str(best_trial))
    print("*************************************")

In [10]:
# Train and save results
train_results = train(train_data, val_data, model_name)


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[I 2024-05-06 17:42:20,826] A new study created in memory with name: no-name-22197daa-3c54-413d-8860-ec9b82f296fd
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bi

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

  0%|          | 0/272 [00:00<?, ?it/s]

{'loss': 1.4424, 'grad_norm': 33.74838638305664, 'learning_rate': 1.0096524908660242e-05, 'epoch': 0.62}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9174505472183228, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4139, 'eval_samples_per_second': 36.241, 'eval_steps_per_second': 4.832, 'epoch': 1.0}
{'loss': 1.0664, 'grad_norm': 20.912220001220703, 'learning_rate': 2.0193049817320484e-05, 'epoch': 1.25}
{'loss': 1.0345, 'grad_norm': 43.496665954589844, 'learning_rate': 2.8038546221754833e-05, 'epoch': 1.88}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0129220485687256, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4042, 'eval_samples_per_second': 37.113, 'eval_steps_per_second': 4.948, 'epoch': 2.0}
{'loss': 0.8166, 'grad_norm': 66.12140655517578, 'learning_rate': 2.6879928609285625e-05, 'epoch': 2.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8971166610717773, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4173, 'eval_samples_per_second': 35.945, 'eval_steps_per_second': 4.793, 'epoch': 3.0}
{'loss': 0.8529, 'grad_norm': 66.2474365234375, 'learning_rate': 2.5721310996816417e-05, 'epoch': 3.12}
{'loss': 0.6477, 'grad_norm': 51.83633804321289, 'learning_rate': 2.456269338434721e-05, 'epoch': 3.75}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8558684587478638, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.413, 'eval_samples_per_second': 36.32, 'eval_steps_per_second': 4.843, 'epoch': 4.0}
{'loss': 0.5163, 'grad_norm': 39.527767181396484, 'learning_rate': 2.3404075771878e-05, 'epoch': 4.38}
{'loss': 0.3759, 'grad_norm': 38.39536666870117, 'learning_rate': 2.2245458159408793e-05, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9912449717521667, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4789, 'eval_samples_per_second': 31.32, 'eval_steps_per_second': 4.176, 'epoch': 5.0}
{'loss': 0.2791, 'grad_norm': 97.71971893310547, 'learning_rate': 2.1086840546939588e-05, 'epoch': 5.62}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9891290664672852, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4097, 'eval_samples_per_second': 36.616, 'eval_steps_per_second': 4.882, 'epoch': 6.0}
{'loss': 0.1937, 'grad_norm': 0.372452974319458, 'learning_rate': 1.9928222934470376e-05, 'epoch': 6.25}
{'loss': 0.4064, 'grad_norm': 19.02629852294922, 'learning_rate': 1.876960532200117e-05, 'epoch': 6.88}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.544020175933838, 'eval_accuracy': 0.13333333333333333, 'eval_runtime': 0.4078, 'eval_samples_per_second': 36.783, 'eval_steps_per_second': 4.904, 'epoch': 7.0}
{'loss': 0.2301, 'grad_norm': 7.899446964263916, 'learning_rate': 1.7610987709531964e-05, 'epoch': 7.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.3544718027114868, 'eval_accuracy': 0.8, 'eval_runtime': 0.4051, 'eval_samples_per_second': 37.025, 'eval_steps_per_second': 4.937, 'epoch': 8.0}
{'loss': 0.0459, 'grad_norm': 39.72601318359375, 'learning_rate': 1.6452370097062752e-05, 'epoch': 8.12}
{'loss': 0.0882, 'grad_norm': 2.1837973594665527, 'learning_rate': 1.5293752484593547e-05, 'epoch': 8.75}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.843200445175171, 'eval_accuracy': 0.6, 'eval_runtime': 0.4045, 'eval_samples_per_second': 37.08, 'eval_steps_per_second': 4.944, 'epoch': 9.0}
{'loss': 0.0365, 'grad_norm': 1.0515015125274658, 'learning_rate': 1.4135134872124338e-05, 'epoch': 9.38}
{'loss': 0.016, 'grad_norm': 0.21208514273166656, 'learning_rate': 1.297651725965513e-05, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.6836340427398682, 'eval_accuracy': 0.8, 'eval_runtime': 0.4069, 'eval_samples_per_second': 36.863, 'eval_steps_per_second': 4.915, 'epoch': 10.0}
{'loss': 0.0012, 'grad_norm': 0.3704510033130646, 'learning_rate': 1.1817899647185921e-05, 'epoch': 10.62}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.9686696529388428, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4063, 'eval_samples_per_second': 36.923, 'eval_steps_per_second': 4.923, 'epoch': 11.0}
{'loss': 0.0881, 'grad_norm': 0.09819673001766205, 'learning_rate': 1.0659282034716713e-05, 'epoch': 11.25}
{'loss': 0.0024, 'grad_norm': 0.06873517483472824, 'learning_rate': 9.500664422247505e-06, 'epoch': 11.88}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.9556736946105957, 'eval_accuracy': 0.8, 'eval_runtime': 0.3969, 'eval_samples_per_second': 37.793, 'eval_steps_per_second': 5.039, 'epoch': 12.0}
{'loss': 0.0226, 'grad_norm': 0.02549740858376026, 'learning_rate': 8.342046809778297e-06, 'epoch': 12.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.8974997997283936, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4042, 'eval_samples_per_second': 37.106, 'eval_steps_per_second': 4.947, 'epoch': 13.0}
{'loss': 0.0056, 'grad_norm': 0.026432307437062263, 'learning_rate': 7.1834291973090896e-06, 'epoch': 13.12}
{'loss': 0.0002, 'grad_norm': 0.02882941998541355, 'learning_rate': 6.0248115848398814e-06, 'epoch': 13.75}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.013906717300415, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4073, 'eval_samples_per_second': 36.83, 'eval_steps_per_second': 4.911, 'epoch': 14.0}
{'loss': 0.0009, 'grad_norm': 0.01601402461528778, 'learning_rate': 4.866193972370673e-06, 'epoch': 14.38}
{'loss': 0.0181, 'grad_norm': 0.05776462331414223, 'learning_rate': 3.7075763599014656e-06, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.886868953704834, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4019, 'eval_samples_per_second': 37.322, 'eval_steps_per_second': 4.976, 'epoch': 15.0}
{'loss': 0.0011, 'grad_norm': 0.007789098657667637, 'learning_rate': 2.5489587474322575e-06, 'epoch': 15.62}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.9782941341400146, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4123, 'eval_samples_per_second': 36.379, 'eval_steps_per_second': 4.851, 'epoch': 16.0}
{'loss': 0.0004, 'grad_norm': 0.02803037315607071, 'learning_rate': 1.3903411349630495e-06, 'epoch': 16.25}
{'loss': 0.0005, 'grad_norm': 0.015021628700196743, 'learning_rate': 2.317235224938416e-07, 'epoch': 16.88}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.9749313592910767, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.3968, 'eval_samples_per_second': 37.801, 'eval_steps_per_second': 5.04, 'epoch': 17.0}
{'train_runtime': 123.2732, 'train_samples_per_second': 8.412, 'train_steps_per_second': 2.206, 'train_loss': 0.30111056054353386, 'epoch': 17.0}


[I 2024-05-06 17:44:26,789] Trial 0 finished with value: 0.6666666666666666 and parameters: {'learning_rate': 2.8270269744248675e-05, 'weight_decay': 0.12280585077732202, 'num_train_epochs': 17, 'per_device_train_batch_size': 4}. Best is trial 0 with value: 0.6666666666666666.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▇▇▇▇▇▇▁█▆█▇█▇▇▇▇▇
eval/loss,▁▂▁▁▂▂█▃▅▄▆▆▅▆▅▆▆
eval/runtime,▂▂▃▂█▂▂▂▂▂▂▁▂▂▁▂▁
eval/samples_per_second,▆▇▆▆▁▇▇▇▇▇▇█▇▇▇▆█
eval/steps_per_second,▆▇▆▆▁▇▇▇▇▇▇█▇▇▇▆█
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/grad_norm,▃▂▄▆▆▅▄▄█▁▂▂▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▃▆██▇▇▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▁▁
train/loss,█▆▆▅▅▄▄▃▂▂▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.66667
eval/loss,1.97493
eval/runtime,0.3968
eval/samples_per_second,37.801
eval/steps_per_second,5.04
total_flos,8.124626118596198e+16
train/epoch,17.0
train/global_step,272.0
train/grad_norm,0.01502
train/learning_rate,0.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

  0%|          | 0/72 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8840122222900391, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.398, 'eval_samples_per_second': 37.692, 'eval_steps_per_second': 5.026, 'epoch': 1.0}
{'loss': 1.254, 'grad_norm': 15.852934837341309, 'learning_rate': 1.876332082553564e-05, 'epoch': 1.25}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0655567646026611, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4167, 'eval_samples_per_second': 35.999, 'eval_steps_per_second': 4.8, 'epoch': 2.0}
{'loss': 0.984, 'grad_norm': 21.980636596679688, 'learning_rate': 1.5736978756900858e-05, 'epoch': 2.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9566660523414612, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.3966, 'eval_samples_per_second': 37.817, 'eval_steps_per_second': 5.042, 'epoch': 3.0}
{'loss': 0.8127, 'grad_norm': 13.283743858337402, 'learning_rate': 1.2710636688266078e-05, 'epoch': 3.75}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9328389763832092, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4078, 'eval_samples_per_second': 36.78, 'eval_steps_per_second': 4.904, 'epoch': 4.0}
{'loss': 0.6868, 'grad_norm': 12.723321914672852, 'learning_rate': 9.684294619631298e-06, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9183104634284973, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4008, 'eval_samples_per_second': 37.429, 'eval_steps_per_second': 4.991, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9024428725242615, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.397, 'eval_samples_per_second': 37.786, 'eval_steps_per_second': 5.038, 'epoch': 6.0}
{'loss': 0.5461, 'grad_norm': 13.68991756439209, 'learning_rate': 6.657952550996517e-06, 'epoch': 6.25}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9349414110183716, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.3972, 'eval_samples_per_second': 37.765, 'eval_steps_per_second': 5.035, 'epoch': 7.0}
{'loss': 0.4957, 'grad_norm': 12.632039070129395, 'learning_rate': 3.6316104823617366e-06, 'epoch': 7.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9539352059364319, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.3994, 'eval_samples_per_second': 37.553, 'eval_steps_per_second': 5.007, 'epoch': 8.0}
{'loss': 0.4166, 'grad_norm': 13.198756217956543, 'learning_rate': 6.052684137269561e-07, 'epoch': 8.75}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9381629228591919, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4058, 'eval_samples_per_second': 36.96, 'eval_steps_per_second': 4.928, 'epoch': 9.0}
{'train_runtime': 69.1407, 'train_samples_per_second': 7.94, 'train_steps_per_second': 1.041, 'train_loss': 0.7287277107437452, 'epoch': 9.0}


[I 2024-05-06 17:45:37,272] Trial 1 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 1.9368589239262595e-05, 'weight_decay': 0.21536378501859094, 'num_train_epochs': 9, 'per_device_train_batch_size': 8}. Best is trial 1 with value: 0.7333333333333333.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁▁▁▁▁▁▁▁▁
eval/loss,▁█▄▃▂▂▃▄▃
eval/runtime,▁█▁▅▂▁▁▂▄
eval/samples_per_second,█▁█▄▇██▇▅
eval/steps_per_second,█▁█▄▇██▇▅
train/epoch,▁▁▂▂▃▃▄▅▅▅▆▆▇▇███
train/global_step,▁▁▂▂▃▃▄▅▅▅▆▆▇▇███
train/grad_norm,▃█▁▁▂▁▁
train/learning_rate,█▇▆▅▃▂▁
train/loss,█▆▄▃▂▂▁

0,1
eval/accuracy,0.73333
eval/loss,0.93816
eval/runtime,0.4058
eval/samples_per_second,36.96
eval/steps_per_second,4.928
total_flos,4.301272651021517e+16
train/epoch,9.0
train/global_step,72.0
train/grad_norm,13.19876
train/learning_rate,0.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888889107, max=1.0…

  0%|          | 0/64 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0498203039169312, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4006, 'eval_samples_per_second': 37.447, 'eval_steps_per_second': 4.993, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.024740219116211, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4119, 'eval_samples_per_second': 36.413, 'eval_steps_per_second': 4.855, 'epoch': 2.0}
{'loss': 1.1927, 'grad_norm': 28.013072967529297, 'learning_rate': 2.986461651939802e-05, 'epoch': 2.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1764719486236572, 'eval_accuracy': 0.4666666666666667, 'eval_runtime': 0.4031, 'eval_samples_per_second': 37.21, 'eval_steps_per_second': 4.961, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9322866797447205, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.3983, 'eval_samples_per_second': 37.657, 'eval_steps_per_second': 5.021, 'epoch': 4.0}
{'loss': 0.8692, 'grad_norm': 14.014689445495605, 'learning_rate': 2.4334131978768755e-05, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0741958618164062, 'eval_accuracy': 0.5333333333333333, 'eval_runtime': 0.405, 'eval_samples_per_second': 37.041, 'eval_steps_per_second': 4.939, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8660989999771118, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4115, 'eval_samples_per_second': 36.449, 'eval_steps_per_second': 4.86, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8794398903846741, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4096, 'eval_samples_per_second': 36.621, 'eval_steps_per_second': 4.883, 'epoch': 7.0}
{'loss': 0.6273, 'grad_norm': 15.619958877563477, 'learning_rate': 1.8803647438139494e-05, 'epoch': 7.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0557609796524048, 'eval_accuracy': 0.6, 'eval_runtime': 0.3972, 'eval_samples_per_second': 37.766, 'eval_steps_per_second': 5.035, 'epoch': 8.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8754091858863831, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4121, 'eval_samples_per_second': 36.395, 'eval_steps_per_second': 4.853, 'epoch': 9.0}
{'loss': 0.3845, 'grad_norm': 9.372049331665039, 'learning_rate': 1.327316289751023e-05, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9125260710716248, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4489, 'eval_samples_per_second': 33.412, 'eval_steps_per_second': 4.455, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0258935689926147, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.419, 'eval_samples_per_second': 35.798, 'eval_steps_per_second': 4.773, 'epoch': 11.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0276169776916504, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.41, 'eval_samples_per_second': 36.588, 'eval_steps_per_second': 4.878, 'epoch': 12.0}
{'loss': 0.226, 'grad_norm': 5.447939872741699, 'learning_rate': 7.742678356880969e-06, 'epoch': 12.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0066627264022827, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4373, 'eval_samples_per_second': 34.305, 'eval_steps_per_second': 4.574, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0193711519241333, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4187, 'eval_samples_per_second': 35.827, 'eval_steps_per_second': 4.777, 'epoch': 14.0}
{'loss': 0.1137, 'grad_norm': 3.657318115234375, 'learning_rate': 2.2121938162517054e-06, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0153039693832397, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4157, 'eval_samples_per_second': 36.084, 'eval_steps_per_second': 4.811, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0292719602584839, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4131, 'eval_samples_per_second': 36.312, 'eval_steps_per_second': 4.842, 'epoch': 16.0}
{'train_runtime': 113.3731, 'train_samples_per_second': 8.609, 'train_steps_per_second': 0.565, 'train_loss': 0.5385326268151402, 'epoch': 16.0}


[I 2024-05-06 17:47:31,997] Trial 2 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 3.15237618815868e-05, 'weight_decay': 0.26342087603524683, 'num_train_epochs': 16, 'per_device_train_batch_size': 16}. Best is trial 1 with value: 0.7333333333333333.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,██▁█▃██▅██▆███▆█
eval/loss,▅▅█▂▆▁▁▅▁▂▅▅▄▄▄▅
eval/runtime,▁▃▂▁▂▃▃▁▃█▄▃▆▄▄▃
eval/samples_per_second,▇▆▇█▇▆▆█▆▁▅▆▂▅▅▆
eval/steps_per_second,▇▆▇█▇▆▆█▆▁▅▆▂▅▅▆
train/epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇████
train/global_step,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇████
train/grad_norm,█▄▄▃▂▁
train/learning_rate,█▇▅▄▂▁
train/loss,█▆▄▃▂▁

0,1
eval/accuracy,0.73333
eval/loss,1.02927
eval/runtime,0.4131
eval/samples_per_second,36.312
eval/steps_per_second,4.842
total_flos,7.646706935149363e+16
train/epoch,16.0
train/global_step,64.0
train/grad_norm,3.65732
train/learning_rate,0.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777777626, max=1.0…

  0%|          | 0/44 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.2748209238052368, 'eval_accuracy': 0.3333333333333333, 'eval_runtime': 0.3959, 'eval_samples_per_second': 37.884, 'eval_steps_per_second': 5.051, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9017824530601501, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4186, 'eval_samples_per_second': 35.834, 'eval_steps_per_second': 4.778, 'epoch': 2.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1097502708435059, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4374, 'eval_samples_per_second': 34.294, 'eval_steps_per_second': 4.573, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.2531496286392212, 'eval_accuracy': 0.4666666666666667, 'eval_runtime': 0.4046, 'eval_samples_per_second': 37.076, 'eval_steps_per_second': 4.943, 'epoch': 4.0}
{'loss': 1.1171, 'grad_norm': 13.408491134643555, 'learning_rate': 3.9548244924057e-05, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0857810974121094, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4076, 'eval_samples_per_second': 36.798, 'eval_steps_per_second': 4.906, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.084642767906189, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4117, 'eval_samples_per_second': 36.434, 'eval_steps_per_second': 4.858, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1259233951568604, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.3998, 'eval_samples_per_second': 37.517, 'eval_steps_per_second': 5.002, 'epoch': 7.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1364567279815674, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4092, 'eval_samples_per_second': 36.657, 'eval_steps_per_second': 4.888, 'epoch': 8.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9970535635948181, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4093, 'eval_samples_per_second': 36.647, 'eval_steps_per_second': 4.886, 'epoch': 9.0}
{'loss': 0.6301, 'grad_norm': 8.459834098815918, 'learning_rate': 2.7916408181687294e-05, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0862901210784912, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4141, 'eval_samples_per_second': 36.224, 'eval_steps_per_second': 4.83, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.277100920677185, 'eval_accuracy': 0.4666666666666667, 'eval_runtime': 0.4096, 'eval_samples_per_second': 36.621, 'eval_steps_per_second': 4.883, 'epoch': 11.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1806144714355469, 'eval_accuracy': 0.6, 'eval_runtime': 0.4129, 'eval_samples_per_second': 36.326, 'eval_steps_per_second': 4.843, 'epoch': 12.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.069523572921753, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4125, 'eval_samples_per_second': 36.365, 'eval_steps_per_second': 4.849, 'epoch': 13.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1130167245864868, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.3979, 'eval_samples_per_second': 37.696, 'eval_steps_per_second': 5.026, 'epoch': 14.0}
{'loss': 0.2743, 'grad_norm': 17.87546157836914, 'learning_rate': 1.628457143931759e-05, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1899561882019043, 'eval_accuracy': 0.6, 'eval_runtime': 0.4163, 'eval_samples_per_second': 36.029, 'eval_steps_per_second': 4.804, 'epoch': 15.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.2115389108657837, 'eval_accuracy': 0.6, 'eval_runtime': 0.4084, 'eval_samples_per_second': 36.731, 'eval_steps_per_second': 4.898, 'epoch': 16.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.205281138420105, 'eval_accuracy': 0.6, 'eval_runtime': 0.4241, 'eval_samples_per_second': 35.373, 'eval_steps_per_second': 4.716, 'epoch': 17.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1836055517196655, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4122, 'eval_samples_per_second': 36.388, 'eval_steps_per_second': 4.852, 'epoch': 18.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1741182804107666, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4127, 'eval_samples_per_second': 36.345, 'eval_steps_per_second': 4.846, 'epoch': 19.0}
{'loss': 0.1323, 'grad_norm': 4.411285400390625, 'learning_rate': 4.652734696947882e-06, 'epoch': 20.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1658995151519775, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.413, 'eval_samples_per_second': 36.317, 'eval_steps_per_second': 4.842, 'epoch': 20.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1681358814239502, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4146, 'eval_samples_per_second': 36.181, 'eval_steps_per_second': 4.824, 'epoch': 21.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1726632118225098, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4109, 'eval_samples_per_second': 36.507, 'eval_steps_per_second': 4.868, 'epoch': 22.0}
{'train_runtime': 162.6514, 'train_samples_per_second': 8.251, 'train_steps_per_second': 0.271, 'train_loss': 0.4972017387097532, 'epoch': 22.0}


[I 2024-05-06 17:50:15,972] Trial 3 finished with value: 0.6666666666666666 and parameters: {'learning_rate': 4.536416329524185e-05, 'weight_decay': 0.2600868764770833, 'num_train_epochs': 22, 'per_device_train_batch_size': 32}. Best is trial 1 with value: 0.7333333333333333.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁██▃███▇█▇▃▆▇▇▆▆▆▇▇▇▇▇
eval/loss,█▁▅█▄▄▅▅▃▄█▆▄▅▆▇▇▆▆▆▆▆
eval/runtime,▁▅█▂▃▄▂▃▃▄▃▄▄▁▄▃▆▄▄▄▄▄
eval/samples_per_second,█▄▁▆▆▅▇▆▆▅▆▅▅█▄▆▃▅▅▅▅▅
eval/steps_per_second,█▄▁▆▆▅▇▆▆▅▆▅▅█▄▆▃▅▅▅▅▅
train/epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/global_step,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇███
train/grad_norm,▆▃█▁
train/learning_rate,█▆▃▁
train/loss,█▅▂▁

0,1
eval/accuracy,0.66667
eval/loss,1.17266
eval/runtime,0.4109
eval/samples_per_second,36.507
eval/steps_per_second,4.868
total_flos,1.0514222035830374e+17
train/epoch,22.0
train/global_step,44.0
train/grad_norm,4.41129
train/learning_rate,0.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888888475, max=1.0…

  0%|          | 0/104 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.91008460521698, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.3976, 'eval_samples_per_second': 37.73, 'eval_steps_per_second': 5.031, 'epoch': 1.0}
{'loss': 1.2273, 'grad_norm': 33.14131546020508, 'learning_rate': 3.444571512486944e-05, 'epoch': 1.25}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0517505407333374, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4091, 'eval_samples_per_second': 36.667, 'eval_steps_per_second': 4.889, 'epoch': 2.0}
{'loss': 0.9571, 'grad_norm': 22.110187530517578, 'learning_rate': 3.422348470470899e-05, 'epoch': 2.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0722347497940063, 'eval_accuracy': 0.6, 'eval_runtime': 0.4127, 'eval_samples_per_second': 36.343, 'eval_steps_per_second': 4.846, 'epoch': 3.0}
{'loss': 0.7089, 'grad_norm': 21.18468475341797, 'learning_rate': 3.0149260335100772e-05, 'epoch': 3.75}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9622055888175964, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4127, 'eval_samples_per_second': 36.344, 'eval_steps_per_second': 4.846, 'epoch': 4.0}
{'loss': 0.4806, 'grad_norm': 33.472835540771484, 'learning_rate': 2.6075035965492564e-05, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1784064769744873, 'eval_accuracy': 0.4, 'eval_runtime': 0.4086, 'eval_samples_per_second': 36.709, 'eval_steps_per_second': 4.895, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0520755052566528, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4178, 'eval_samples_per_second': 35.901, 'eval_steps_per_second': 4.787, 'epoch': 6.0}
{'loss': 0.2649, 'grad_norm': 3.9143831729888916, 'learning_rate': 2.200081159588435e-05, 'epoch': 6.25}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.2677336931228638, 'eval_accuracy': 0.4666666666666667, 'eval_runtime': 0.4082, 'eval_samples_per_second': 36.748, 'eval_steps_per_second': 4.9, 'epoch': 7.0}
{'loss': 0.1308, 'grad_norm': 18.963220596313477, 'learning_rate': 1.7926587226276137e-05, 'epoch': 7.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.2147268056869507, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.413, 'eval_samples_per_second': 36.319, 'eval_steps_per_second': 4.843, 'epoch': 8.0}
{'loss': 0.0833, 'grad_norm': 15.298001289367676, 'learning_rate': 1.3852362856667924e-05, 'epoch': 8.75}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.865329384803772, 'eval_accuracy': 0.4666666666666667, 'eval_runtime': 0.4166, 'eval_samples_per_second': 36.007, 'eval_steps_per_second': 4.801, 'epoch': 9.0}
{'loss': 0.0834, 'grad_norm': 17.6887264251709, 'learning_rate': 9.77813848705971e-06, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.6178860664367676, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4409, 'eval_samples_per_second': 34.023, 'eval_steps_per_second': 4.536, 'epoch': 10.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.668994426727295, 'eval_accuracy': 0.4666666666666667, 'eval_runtime': 0.4127, 'eval_samples_per_second': 36.348, 'eval_steps_per_second': 4.846, 'epoch': 11.0}
{'loss': 0.0346, 'grad_norm': 18.287403106689453, 'learning_rate': 5.703914117451498e-06, 'epoch': 11.25}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 2.081536293029785, 'eval_accuracy': 0.4666666666666667, 'eval_runtime': 0.424, 'eval_samples_per_second': 35.379, 'eval_steps_per_second': 4.717, 'epoch': 12.0}
{'loss': 0.0325, 'grad_norm': 6.035172462463379, 'learning_rate': 1.6296897478432853e-06, 'epoch': 12.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.9302390813827515, 'eval_accuracy': 0.4666666666666667, 'eval_runtime': 0.4629, 'eval_samples_per_second': 32.406, 'eval_steps_per_second': 4.321, 'epoch': 13.0}
{'train_runtime': 102.7182, 'train_samples_per_second': 7.72, 'train_steps_per_second': 1.012, 'train_loss': 0.38584666355298114, 'epoch': 13.0}


[I 2024-05-06 17:52:01,133] Trial 4 finished with value: 0.4666666666666667 and parameters: {'learning_rate': 3.789028663735638e-05, 'weight_decay': 0.04246751233340829, 'num_train_epochs': 13, 'per_device_train_batch_size': 8}. Best is trial 1 with value: 0.7333333333333333.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,██▅█▁█▂▇▂█▂▂▂
eval/loss,▁▂▂▁▃▂▃▃▇▅▆█▇
eval/runtime,▁▂▃▃▂▃▂▃▃▆▃▄█
eval/samples_per_second,█▇▆▆▇▆▇▆▆▃▆▅▁
eval/steps_per_second,█▇▆▆▇▆▇▆▆▃▆▅▁
train/epoch,▁▁▂▂▂▃▃▃▃▄▄▅▅▅▆▆▆▆▇▇▇███
train/global_step,▁▁▂▂▂▃▃▃▃▄▄▅▅▅▆▆▆▆▇▇▇███
train/grad_norm,█▅▅█▁▅▄▄▄▂
train/learning_rate,██▇▆▅▄▄▃▂▁
train/loss,█▆▅▄▂▂▁▁▁▁

0,1
eval/accuracy,0.46667
eval/loss,1.93024
eval/runtime,0.4629
eval/samples_per_second,32.406
eval/steps_per_second,4.321
total_flos,6.212949384808858e+16
train/epoch,13.0
train/global_step,104.0
train/grad_norm,6.03517
train/learning_rate,0.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888888475, max=1.0…

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.3672263622283936, 'eval_accuracy': 0.2, 'eval_runtime': 0.3971, 'eval_samples_per_second': 37.769, 'eval_steps_per_second': 5.036, 'epoch': 1.0}


[I 2024-05-06 17:52:24,611] Trial 5 pruned. 
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.2
eval/loss,1.36723
eval/runtime,0.3971
eval/samples_per_second,37.769
eval/steps_per_second,5.036
train/epoch,1.0
train/global_step,2.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777777626, max=1.0…

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.292837142944336, 'eval_accuracy': 0.3333333333333333, 'eval_runtime': 0.3991, 'eval_samples_per_second': 37.586, 'eval_steps_per_second': 5.012, 'epoch': 1.0}


[I 2024-05-06 17:52:48,619] Trial 6 pruned. 
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.33333
eval/loss,1.29284
eval/runtime,0.3991
eval/samples_per_second,37.586
eval/steps_per_second,5.012
train/epoch,1.0
train/global_step,4.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888889738, max=1.0…

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0907775163650513, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.3985, 'eval_samples_per_second': 37.642, 'eval_steps_per_second': 5.019, 'epoch': 1.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9222650527954102, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4146, 'eval_samples_per_second': 36.183, 'eval_steps_per_second': 4.824, 'epoch': 2.0}
{'loss': 1.2009, 'grad_norm': 16.069456100463867, 'learning_rate': 1.2357628683925574e-05, 'epoch': 2.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0258694887161255, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.414, 'eval_samples_per_second': 36.232, 'eval_steps_per_second': 4.831, 'epoch': 3.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.1224216222763062, 'eval_accuracy': 0.6, 'eval_runtime': 0.4239, 'eval_samples_per_second': 35.385, 'eval_steps_per_second': 4.718, 'epoch': 4.0}
{'loss': 0.9297, 'grad_norm': 20.29051971435547, 'learning_rate': 6.740524736686677e-06, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.062342643737793, 'eval_accuracy': 0.6666666666666666, 'eval_runtime': 0.4273, 'eval_samples_per_second': 35.1, 'eval_steps_per_second': 4.68, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9663953185081482, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4776, 'eval_samples_per_second': 31.41, 'eval_steps_per_second': 4.188, 'epoch': 6.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9168387651443481, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4228, 'eval_samples_per_second': 35.48, 'eval_steps_per_second': 4.731, 'epoch': 7.0}
{'loss': 0.8456, 'grad_norm': 11.031815528869629, 'learning_rate': 1.1234207894477796e-06, 'epoch': 7.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9117479920387268, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4209, 'eval_samples_per_second': 35.641, 'eval_steps_per_second': 4.752, 'epoch': 8.0}
{'train_runtime': 87.9858, 'train_samples_per_second': 5.546, 'train_steps_per_second': 0.364, 'train_loss': 0.9761386848986149, 'epoch': 8.0}


[I 2024-05-06 17:54:17,989] Trial 7 finished with value: 0.7333333333333333 and parameters: {'learning_rate': 1.5727891052268913e-05, 'weight_decay': 0.15064071502511128, 'num_train_epochs': 8, 'per_device_train_batch_size': 16}. Best is trial 1 with value: 0.7333333333333333.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,███▁▅███
eval/loss,▇▁▅█▆▃▁▁
eval/runtime,▁▂▂▃▄█▃▃
eval/samples_per_second,█▆▆▅▅▁▆▆
eval/steps_per_second,█▆▆▅▅▁▆▆
train/epoch,▁▂▃▃▄▅▅▆▇▇██
train/global_step,▁▂▃▃▄▅▅▆▇▇██
train/grad_norm,▅█▁
train/learning_rate,█▅▁
train/loss,█▃▁

0,1
eval/accuracy,0.73333
eval/loss,0.91175
eval/runtime,0.4209
eval/samples_per_second,35.641
eval/steps_per_second,4.752
total_flos,4.779191834468352e+16
train/epoch,8.0
train/global_step,32.0
train/grad_norm,11.03182
train/learning_rate,0.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777777626, max=1.0…

  0%|          | 0/80 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.281590223312378, 'eval_accuracy': 0.3333333333333333, 'eval_runtime': 0.3999, 'eval_samples_per_second': 37.51, 'eval_steps_per_second': 5.001, 'epoch': 1.0}


[I 2024-05-06 17:54:43,985] Trial 8 pruned. 
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.33333
eval/loss,1.28159
eval/runtime,0.3999
eval/samples_per_second,37.51
eval/steps_per_second,5.001
train/epoch,1.0
train/global_step,4.0


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

  0%|          | 0/432 [00:00<?, ?it/s]

{'loss': 1.4733, 'grad_norm': 32.15748596191406, 'learning_rate': 3.2541297137769206e-06, 'epoch': 0.62}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 1.0834600925445557, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.3994, 'eval_samples_per_second': 37.557, 'eval_steps_per_second': 5.008, 'epoch': 1.0}
{'loss': 1.1602, 'grad_norm': 29.42119026184082, 'learning_rate': 6.508259427553841e-06, 'epoch': 1.25}
{'loss': 1.0418, 'grad_norm': 20.848079681396484, 'learning_rate': 9.76238914133076e-06, 'epoch': 1.88}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8696697950363159, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4115, 'eval_samples_per_second': 36.45, 'eval_steps_per_second': 4.86, 'epoch': 2.0}
{'loss': 1.0253, 'grad_norm': 28.81625747680664, 'learning_rate': 1.3016518855107683e-05, 'epoch': 2.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9081840515136719, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.414, 'eval_samples_per_second': 36.234, 'eval_steps_per_second': 4.831, 'epoch': 3.0}
{'loss': 1.0821, 'grad_norm': 23.454723358154297, 'learning_rate': 1.4096755729165589e-05, 'epoch': 3.12}
{'loss': 0.7819, 'grad_norm': 36.10508728027344, 'learning_rate': 1.3727730710077484e-05, 'epoch': 3.75}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9750910401344299, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.421, 'eval_samples_per_second': 35.631, 'eval_steps_per_second': 4.751, 'epoch': 4.0}
{'loss': 0.6837, 'grad_norm': 25.362003326416016, 'learning_rate': 1.335870569098938e-05, 'epoch': 4.38}
{'loss': 0.712, 'grad_norm': 41.34785079956055, 'learning_rate': 1.2989680671901275e-05, 'epoch': 5.0}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8437416553497314, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.416, 'eval_samples_per_second': 36.061, 'eval_steps_per_second': 4.808, 'epoch': 5.0}
{'loss': 0.5457, 'grad_norm': 24.889291763305664, 'learning_rate': 1.2620655652813171e-05, 'epoch': 5.62}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.8653429746627808, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4147, 'eval_samples_per_second': 36.168, 'eval_steps_per_second': 4.822, 'epoch': 6.0}
{'loss': 0.4099, 'grad_norm': 4.544570446014404, 'learning_rate': 1.2251630633725065e-05, 'epoch': 6.25}
{'loss': 0.6154, 'grad_norm': 13.22926139831543, 'learning_rate': 1.1882605614636962e-05, 'epoch': 6.88}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.976004958152771, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4148, 'eval_samples_per_second': 36.162, 'eval_steps_per_second': 4.822, 'epoch': 7.0}
{'loss': 0.3694, 'grad_norm': 89.93527221679688, 'learning_rate': 1.1513580595548858e-05, 'epoch': 7.5}


  0%|          | 0/2 [00:00<?, ?it/s]

{'eval_loss': 0.9619613885879517, 'eval_accuracy': 0.7333333333333333, 'eval_runtime': 0.4118, 'eval_samples_per_second': 36.424, 'eval_steps_per_second': 4.857, 'epoch': 8.0}


[I 2024-05-06 17:55:47,246] Trial 9 pruned. 


*************************************
 Best run BestRun(run_id='1', objective=0.7333333333333333, hyperparameters={'learning_rate': 1.9368589239262595e-05, 'weight_decay': 0.21536378501859094, 'num_train_epochs': 9, 'per_device_train_batch_size': 8}, run_summary=None)
*************************************


In [17]:
train_results.


***** train metrics *****
  epoch                    =       30.0
  train_loss               =     0.1044
  train_runtime            = 0:01:48.07
  train_samples_per_second =     16.933
  train_steps_per_second   =      2.221


In [None]:

# Evaluate on validation set
metrics = trainer.evaluate(val_data)
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

In [12]:
# from timm.models import create_model

# Swin = create_model('swin_large_patch4_window7_224_in22k',pretrained=True)

In [13]:
import timm

model_names = timm.list_models(pretrained=True)
print(model_names)

['bat_resnext26ts.ch_in1k', 'beit_base_patch16_224.in22k_ft_in22k', 'beit_base_patch16_224.in22k_ft_in22k_in1k', 'beit_base_patch16_384.in22k_ft_in22k_in1k', 'beit_large_patch16_224.in22k_ft_in22k', 'beit_large_patch16_224.in22k_ft_in22k_in1k', 'beit_large_patch16_384.in22k_ft_in22k_in1k', 'beit_large_patch16_512.in22k_ft_in22k_in1k', 'beitv2_base_patch16_224.in1k_ft_in1k', 'beitv2_base_patch16_224.in1k_ft_in22k', 'beitv2_base_patch16_224.in1k_ft_in22k_in1k', 'beitv2_large_patch16_224.in1k_ft_in1k', 'beitv2_large_patch16_224.in1k_ft_in22k', 'beitv2_large_patch16_224.in1k_ft_in22k_in1k', 'botnet26t_256.c1_in1k', 'caformer_b36.sail_in1k', 'caformer_b36.sail_in1k_384', 'caformer_b36.sail_in22k', 'caformer_b36.sail_in22k_ft_in1k', 'caformer_b36.sail_in22k_ft_in1k_384', 'caformer_m36.sail_in1k', 'caformer_m36.sail_in1k_384', 'caformer_m36.sail_in22k', 'caformer_m36.sail_in22k_ft_in1k', 'caformer_m36.sail_in22k_ft_in1k_384', 'caformer_s18.sail_in1k', 'caformer_s18.sail_in1k_384', 'caformer_s