In [36]:

import os
import glob

import matplotlib.pyplot as plt
from pathlib import Path
from PIL import Image
from datasets import load_dataset
from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer

In [4]:
ROOT_DATASET_PATH = Path('./cifake/')
TRAIN_DATASET_PATH = ROOT_DATASET_PATH / 'train'
TEST_DATASET_PATH = ROOT_DATASET_PATH / 'test'

In [19]:
data = load_dataset('imagefolder', data_dir="./cifake/")

Resolving data files:   0%|          | 0/100000 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/20000 [00:00<?, ?it/s]

Downloading data files:   0%|          | 0/100000 [00:00<?, ?it/s]

Downloading data files: 0it [00:00, ?it/s]

Extracting data files: 0it [00:00, ?it/s]

Downloading data files:   0%|          | 0/20000 [00:00<?, ?it/s]

Downloading data files: 0it [00:00, ?it/s]

Extracting data files: 0it [00:00, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [29]:
labels = data["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

In [33]:
checkpoint = "microsoft/resnet-50"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

image_processor

Downloading (…)rocessor_config.json:   0%|          | 0.00/266 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


Downloading (…)lve/main/config.json:   0%|          | 0.00/69.6k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/103M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/resnet-50 were not used when initializing ResNetModel: ['classifier.1.bias', 'classifier.1.weight']
- This IS expected if you are initializing ResNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ResNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


ConvNextImageProcessor {
  "crop_pct": 0.875,
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "ConvNextImageProcessor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "shortest_edge": 224
  }
}

In [34]:
from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize,
    ToTensor,
)

normalize = Normalize(mean=image_processor.image_mean,
                      std=image_processor.image_std)
if "height" in image_processor.size:
    size = (image_processor.size["height"], image_processor.size["width"])
    crop_size = size
    max_size = None
elif "shortest_edge" in image_processor.size:
    size = image_processor.size["shortest_edge"]
    crop_size = (size, size)
    max_size = image_processor.size.get("longest_edge")

train_transforms = Compose(
    [
        RandomResizedCrop(crop_size),
        RandomHorizontalFlip(),
        ToTensor(),
        normalize,
    ]
)

val_transforms = Compose(
    [
        Resize(size),
        CenterCrop(crop_size),
        ToTensor(),
        normalize,
    ]
)

def preprocess_train(example_batch):
    example_batch["pixel_values"] = [
        train_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch

def preprocess_val(example_batch):
    example_batch["pixel_values"] = [val_transforms(
        image.convert("RGB")) for image in example_batch["image"]]
    return example_batch

splits = data["train"].train_test_split(test_size=0.1)
train_ds = splits['train']
val_ds = splits['test']

train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val)

In [39]:
model = AutoModelForImageClassification.from_pretrained(
    checkpoint,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True,
)

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([2, 2048]) in the model instantiated
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [41]:
import numpy as np
from datasets import load_metric

metric = load_metric("f1")

def compute_metrics(eval_pred):
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

In [42]:
batch_size = 32
model_name = checkpoint.split("/")[-1]

args = TrainingArguments(
    f"{model_name}-finetuned-cifake",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=3,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
)

In [43]:
import torch

def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"]
                               for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}

In [44]:
trainer = Trainer(
    model,
    args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=image_processor,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)

In [45]:
train_results = trainer.train()

trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msulthanabiyyu[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/2109 [00:00<?, ?it/s]

{'loss': 0.6928, 'learning_rate': 2.3696682464454976e-06, 'epoch': 0.01}
{'loss': 0.6921, 'learning_rate': 4.739336492890995e-06, 'epoch': 0.03}
{'loss': 0.6911, 'learning_rate': 7.109004739336493e-06, 'epoch': 0.04}
{'loss': 0.6908, 'learning_rate': 9.47867298578199e-06, 'epoch': 0.06}
{'loss': 0.6894, 'learning_rate': 1.184834123222749e-05, 'epoch': 0.07}
{'loss': 0.6881, 'learning_rate': 1.4218009478672985e-05, 'epoch': 0.09}
{'loss': 0.6885, 'learning_rate': 1.6587677725118483e-05, 'epoch': 0.1}
{'loss': 0.686, 'learning_rate': 1.895734597156398e-05, 'epoch': 0.11}
{'loss': 0.6847, 'learning_rate': 2.132701421800948e-05, 'epoch': 0.13}
{'loss': 0.6812, 'learning_rate': 2.369668246445498e-05, 'epoch': 0.14}
{'loss': 0.6781, 'learning_rate': 2.6066350710900477e-05, 'epoch': 0.16}
{'loss': 0.6762, 'learning_rate': 2.843601895734597e-05, 'epoch': 0.17}
{'loss': 0.6743, 'learning_rate': 3.080568720379147e-05, 'epoch': 0.18}
{'loss': 0.668, 'learning_rate': 3.3175355450236966e-05, 'epoch

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.3254718780517578, 'eval_f1': 0.8436485730274202, 'eval_runtime': 205.6149, 'eval_samples_per_second': 48.635, 'eval_steps_per_second': 1.522, 'epoch': 1.0}
{'loss': 0.3291, 'learning_rate': 3.6854583772391995e-05, 'epoch': 1.01}
{'loss': 0.3617, 'learning_rate': 3.6591148577449954e-05, 'epoch': 1.02}
{'loss': 0.3472, 'learning_rate': 3.6327713382507905e-05, 'epoch': 1.04}
{'loss': 0.3676, 'learning_rate': 3.606427818756586e-05, 'epoch': 1.05}
{'loss': 0.3533, 'learning_rate': 3.5800842992623816e-05, 'epoch': 1.07}
{'loss': 0.3574, 'learning_rate': 3.5537407797681774e-05, 'epoch': 1.08}
{'loss': 0.3606, 'learning_rate': 3.527397260273973e-05, 'epoch': 1.09}
{'loss': 0.348, 'learning_rate': 3.5010537407797684e-05, 'epoch': 1.11}
{'loss': 0.3644, 'learning_rate': 3.4747102212855636e-05, 'epoch': 1.12}
{'loss': 0.3502, 'learning_rate': 3.4483667017913594e-05, 'epoch': 1.14}
{'loss': 0.3625, 'learning_rate': 3.4220231822971546e-05, 'epoch': 1.15}
{'loss': 0.3317, 'learning_r

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.24906396865844727, 'eval_f1': 0.8916800858829844, 'eval_runtime': 35.1191, 'eval_samples_per_second': 284.745, 'eval_steps_per_second': 8.913, 'epoch': 2.0}
{'loss': 0.3086, 'learning_rate': 1.8414120126448895e-05, 'epoch': 2.0}
{'loss': 0.3016, 'learning_rate': 1.815068493150685e-05, 'epoch': 2.02}
{'loss': 0.3212, 'learning_rate': 1.7887249736564805e-05, 'epoch': 2.03}
{'loss': 0.3007, 'learning_rate': 1.7623814541622764e-05, 'epoch': 2.05}
{'loss': 0.2974, 'learning_rate': 1.7360379346680716e-05, 'epoch': 2.06}
{'loss': 0.3427, 'learning_rate': 1.7096944151738674e-05, 'epoch': 2.08}
{'loss': 0.3064, 'learning_rate': 1.683350895679663e-05, 'epoch': 2.09}
{'loss': 0.2966, 'learning_rate': 1.6570073761854584e-05, 'epoch': 2.1}
{'loss': 0.3136, 'learning_rate': 1.630663856691254e-05, 'epoch': 2.12}
{'loss': 0.3077, 'learning_rate': 1.6043203371970498e-05, 'epoch': 2.13}
{'loss': 0.3009, 'learning_rate': 1.577976817702845e-05, 'epoch': 2.15}
{'loss': 0.2977, 'learning_rat

  0%|          | 0/313 [00:00<?, ?it/s]

{'eval_loss': 0.23767633736133575, 'eval_f1': 0.8985352293381802, 'eval_runtime': 35.1681, 'eval_samples_per_second': 284.349, 'eval_steps_per_second': 8.9, 'epoch': 3.0}
{'train_runtime': 5487.0869, 'train_samples_per_second': 49.206, 'train_steps_per_second': 0.384, 'train_loss': 0.3798611281663109, 'epoch': 3.0}
***** train metrics *****
  epoch                    =        3.0
  train_loss               =     0.3799
  train_runtime            = 1:31:27.08
  train_samples_per_second =     49.206
  train_steps_per_second   =      0.384


In [46]:
metrics = trainer.evaluate()
# some nice to haves:
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
trainer.push_to_hub()

  0%|          | 0/313 [00:00<?, ?it/s]

***** eval metrics *****
  epoch                   =        3.0
  eval_f1                 =     0.8985
  eval_loss               =     0.2377
  eval_runtime            = 0:00:34.88
  eval_samples_per_second =    286.675
  eval_steps_per_second   =      8.973


LocalTokenNotFoundError: Token is required (`token=True`), but no token found. You need to provide a token or be logged in to Hugging Face with `huggingface-cli login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens.