In [1]:
import os


In [2]:
from datasets import load_dataset, load_metric
from transformers import (
    AutoImageProcessor,
    AutoModelForImageClassification,
    TrainingArguments,
    Trainer,
)
import numpy as np
import torch
from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize,
    ToTensor,
)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
BATCH_SIZE = 180
NUM_TRAIN_EPOCHS = 20
NUM_WORKERS = 15
RANDOM_SEED = 42


In [4]:
cwd = os.getcwd()
train_image_folder = os.path.join(cwd, "Datasets", "imagefolder")

print(f"begin loading {train_image_folder} ...")
dataset = load_dataset("imagefolder", data_dir=train_image_folder)
print("dataset setup successfully!")


begin loading /home/uceezl8/amls_ii/AMLS_II_assignment23_24/Datasets/imagefolder ...
dataset setup successfully!


In [5]:
model_checkpoint = "louislu9911/convnextv2-base-1k-224-finetuned-cassava-leaf-disease"
model_name = model_checkpoint.split("/")[-1]


In [7]:
pre_trained_checkpoint = "facebook/convnextv2-base-1k-224"
image_processor = AutoImageProcessor.from_pretrained(pre_trained_checkpoint)
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
if "height" in image_processor.size:
    size = (image_processor.size["height"], image_processor.size["width"])
    crop_size = size
    max_size = None
elif "shortest_edge" in image_processor.size:
    size = image_processor.size["shortest_edge"]
    crop_size = (size, size)
    max_size = image_processor.size.get("longest_edge")
train_transforms = Compose(
    [
        RandomResizedCrop(crop_size),
        RandomHorizontalFlip(),
        ToTensor(),
        normalize,
    ]
)

val_transforms = Compose(
    [
        Resize(size),
        CenterCrop(crop_size),
        ToTensor(),
        normalize,
    ]
)


In [8]:
def preprocess_train(example_batch):
    """Apply train_transforms across a batch."""
    example_batch["pixel_values"] = [
        train_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch


def preprocess_val(example_batch):
    """Apply val_transforms across a batch."""
    example_batch["pixel_values"] = [
        val_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch


In [9]:
# split up training into training + validation
splits = dataset["train"].train_test_split(test_size=0.1, seed=RANDOM_SEED)
train_ds = splits["train"]
val_ds = splits["test"]
train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val)


In [10]:
model = AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
)


In [24]:
val_ds[0]


{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=800x600>,
 'label': 1,
 'pixel_values': tensor([[[-1.0390, -1.1932, -1.2959,  ..., -0.4054, -0.7137, -0.8678],
          [-0.7479, -1.0048, -1.3473,  ...,  0.2453,  0.4679,  0.1597],
          [-1.0219, -0.8335, -0.9363,  ...,  0.4166,  0.7591,  0.5364],
          ...,
          [-0.8849, -0.0629,  0.3994,  ...,  0.9817,  1.0331,  0.9646],
          [-0.3712,  0.9132,  1.0159,  ...,  0.8618,  1.1700,  0.8789],
          [ 0.0741,  1.5639,  1.6495,  ...,  1.1015,  1.2214,  0.8618]],
 
         [[-1.1954, -1.0728, -1.1604,  ..., -0.5476, -0.8452, -0.9678],
          [-1.2829, -1.2479, -1.4055,  ...,  0.1001,  0.3452,  0.0476],
          [-1.6856, -1.4405, -1.3704,  ...,  0.2752,  0.6604,  0.4328],
          ...,
          [-0.8452, -0.1099,  0.3102,  ...,  0.8004,  0.8704,  0.8179],
          [-0.3375,  0.8354,  0.9055,  ...,  0.6954,  1.0105,  0.7304],
          [ 0.1001,  1.5007,  1.5357,  ...,  0.9755,  1.0805,  0.6954]]

In [31]:
import torch

# forward pass
with torch.no_grad():
    outputs = model(**val_ds[:]['pixel_values'])
    logits = outputs.logits


TypeError: ConvNextV2ForImageClassification(
  (convnextv2): ConvNextV2Model(
    (embeddings): ConvNextV2Embeddings(
      (patch_embeddings): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      (layernorm): ConvNextV2LayerNorm()
    )
    (encoder): ConvNextV2Encoder(
      (stages): ModuleList(
        (0): ConvNextV2Stage(
          (downsampling_layer): Identity()
          (layers): Sequential(
            (0): ConvNextV2Layer(
              (dwconv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=128, out_features=512, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=512, out_features=128, bias=True)
              (drop_path): Identity()
            )
            (1): ConvNextV2Layer(
              (dwconv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=128, out_features=512, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=512, out_features=128, bias=True)
              (drop_path): Identity()
            )
            (2): ConvNextV2Layer(
              (dwconv): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=128, out_features=512, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=512, out_features=128, bias=True)
              (drop_path): Identity()
            )
          )
        )
        (1): ConvNextV2Stage(
          (downsampling_layer): Sequential(
            (0): ConvNextV2LayerNorm()
            (1): Conv2d(128, 256, kernel_size=(2, 2), stride=(2, 2))
          )
          (layers): Sequential(
            (0): ConvNextV2Layer(
              (dwconv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=256, out_features=1024, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=1024, out_features=256, bias=True)
              (drop_path): Identity()
            )
            (1): ConvNextV2Layer(
              (dwconv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=256, out_features=1024, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=1024, out_features=256, bias=True)
              (drop_path): Identity()
            )
            (2): ConvNextV2Layer(
              (dwconv): Conv2d(256, 256, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=256)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=256, out_features=1024, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=1024, out_features=256, bias=True)
              (drop_path): Identity()
            )
          )
        )
        (2): ConvNextV2Stage(
          (downsampling_layer): Sequential(
            (0): ConvNextV2LayerNorm()
            (1): Conv2d(256, 512, kernel_size=(2, 2), stride=(2, 2))
          )
          (layers): Sequential(
            (0): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (1): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (2): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (3): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (4): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (5): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (6): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (7): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (8): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (9): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (10): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (11): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (12): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (13): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (14): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (15): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (16): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (17): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (18): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (19): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (20): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (21): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (22): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (23): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (24): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (25): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
            (26): ConvNextV2Layer(
              (dwconv): Conv2d(512, 512, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=512)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=512, out_features=2048, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=2048, out_features=512, bias=True)
              (drop_path): Identity()
            )
          )
        )
        (3): ConvNextV2Stage(
          (downsampling_layer): Sequential(
            (0): ConvNextV2LayerNorm()
            (1): Conv2d(512, 1024, kernel_size=(2, 2), stride=(2, 2))
          )
          (layers): Sequential(
            (0): ConvNextV2Layer(
              (dwconv): Conv2d(1024, 1024, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=1024)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=1024, out_features=4096, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=4096, out_features=1024, bias=True)
              (drop_path): Identity()
            )
            (1): ConvNextV2Layer(
              (dwconv): Conv2d(1024, 1024, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=1024)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=1024, out_features=4096, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=4096, out_features=1024, bias=True)
              (drop_path): Identity()
            )
            (2): ConvNextV2Layer(
              (dwconv): Conv2d(1024, 1024, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=1024)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=1024, out_features=4096, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=4096, out_features=1024, bias=True)
              (drop_path): Identity()
            )
          )
        )
      )
    )
    (layernorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
  )
  (classifier): Linear(in_features=1024, out_features=1000, bias=True)
) argument after ** must be a mapping, not list

In [10]:
args = TrainingArguments(
    f"{model_name}-finetuned-cassava-leaf-disease",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=4,
    dataloader_num_workers=NUM_WORKERS,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=NUM_TRAIN_EPOCHS,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=True,
)


In [11]:
metric = load_metric("accuracy")


# the compute_metrics function takes a Named Tuple as input:
# predictions, which are the logits of the model as Numpy arrays,
# and label_ids, which are the ground-truth labels as Numpy arrays.
def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)


def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}


  metric = load_metric("accuracy")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [12]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


In [13]:
%env CUDA_VISIBLE_DEVICES=0,1


env: CUDA_VISIBLE_DEVICES=0,1


In [14]:
_, test_y_pred, _ = trainer.predict(val_ds)




In [24]:
test_y = val_ds[:]['label']


In [26]:
type(test_y[0])


int

In [27]:
from sklearn.metrics import confusion_matrix


In [38]:
confusion_matrix(test_y, test_y_pred)


array([[ 116,    0,    0,    0,    0],
       [   0,  231,    0,    0,    0],
       [   0,    0,  244,    0,    0],
       [   0,    0,    0, 1301,    0],
       [   0,    0,    0,    0,  248]])

In [35]:
train_y = []
for i in train_ds:
    train_y.append(i['label'])
len(train_y)


19257

In [40]:
type(model)


transformers.models.convnextv2.modeling_convnextv2.ConvNextV2ForImageClassification

In [36]:
_, train_y_pred, _ = trainer.predict(train_ds)




In [39]:
confusion_matrix(train_y, train_y_pred)


array([[  971,     0,     0,     0,     0],
       [    0,  1958,     0,     0,     0],
       [    0,     0,  2142,     0,     0],
       [    0,     0,     0, 11857,     0],
       [    0,     0,     0,     0,  2329]])

In [15]:
print("Begin training...")
train_results = trainer.train()
print("training ends")
# rest is optional but nice to have
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()


Begin training...




Epoch,Training Loss,Validation Loss,Accuracy
0,7.5749,2.582166,0.491121
2,0.7763,0.60695,0.784112
4,0.4949,0.431343,0.846262
6,0.4194,0.39027,0.862617
8,0.3836,0.385523,0.869626
10,0.371,0.348935,0.878505
12,0.3454,0.345696,0.88271
14,0.3251,0.335924,0.882243




RuntimeError: [enforce fail at inline_container.cc:595] . unexpected pos 503262976 vs 503262864