# SwinT FineTuning with CIFAR-10

## Import Library

In [23]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from datasets import load_dataset,load_metric

from transformers import AutoImageProcessor, SwinForImageClassification, TrainingArguments, Trainer

## Load Dataset (CIFAR10)

In [24]:

dataset = load_dataset('cifar10',split='train')
dataset

Found cached dataset cifar10 (C:/Users/USER/.cache/huggingface/datasets/cifar10/plain_text/1.0.0/447d6ec4733dddd1ce3bb577c7166b986eaa4c538dcd9e805ba61f35674a9de4)


Dataset({
    features: ['img', 'label'],
    num_rows: 50000
})

In [25]:
label2id = {"plane":0, "car":1, "bird":2, "cat":3, "deer":4, "dog":5, "frog":6, "horse":7, "ship":8, "truck":9}
id2label = {0:"plane", 1:"car", 2:"bird", 3:"cat", 4:"deer", 5:"dog", 6:"frog", 7:"horse", 8:"ship", 9:"truck"}

In [26]:
model_name_or_path = 'microsoft/swin-tiny-patch4-window7-224'
feature_extractor = AutoImageProcessor.from_pretrained(model_name_or_path)

loading configuration file preprocessor_config.json from cache at C:\Users\USER/.cache\huggingface\hub\models--microsoft--swin-tiny-patch4-window7-224\snapshots\83d40fb5b9320b349382208d9e7fe998484e99df\preprocessor_config.json
Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
size should be a dictionary on of the following set of keys: ({'height', 'width'}, {'shortest_edge'}, {'shortest_edge', 'longest_edge'}), got 224. Converted to {'height': 224, 'width': 224}.
Image processor ViTImageProcessor {
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.485,
    0.456,
    0.406
  ],
  "image_processor_type": "ViTImageProcessor",
  "image_std": [
    0.229,
    0.224,
    0.225
  ],
  "resample": 3,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 224,
    "width": 224
  }
}



In [7]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['img']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['label'] = example_batch['label']
    return inputs

In [8]:
prepared_ds = dataset.with_transform(transform)
prepared_ds = prepared_ds.train_test_split(test_size=0.1)
prepared_ds

DatasetDict({
    train: Dataset({
        features: ['img', 'label'],
        num_rows: 45000
    })
    test: Dataset({
        features: ['img', 'label'],
        num_rows: 5000
    })
})

## Training

In [9]:
def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['label'] for x in batch])
    }

In [10]:
metric = load_metric("accuracy")
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

  metric = load_metric("accuracy")


In [12]:

model = SwinForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=10,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes = True)

Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([10, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
training_args = TrainingArguments(
  output_dir="./",
  per_device_train_batch_size=8,
  evaluation_strategy="steps",
  num_train_epochs=2,
  fp16=True,
  save_steps=10000,
  eval_steps=10000,
  logging_steps=1000,
  learning_rate=2e-4,
  save_total_limit=2,
  remove_unused_columns=False,
  push_to_hub=False,
  report_to='tensorboard',
  load_best_model_at_end=True,
  
)

PyTorch: setting up devices


In [16]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["test"],
    tokenizer=feature_extractor,
)

Using cuda_amp half precision backend


In [17]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

***** Running training *****
  Num examples = 45000
  Num Epochs = 2
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 11250
  Number of trainable parameters = 27527044
  1%|          | 100/11250 [01:12<1:56:14,  1.60it/s]

{'loss': 2.0563, 'learning_rate': 0.00019872000000000002, 'epoch': 0.02}


  2%|▏         | 200/11250 [02:14<1:55:45,  1.59it/s]

{'loss': 1.3852, 'learning_rate': 0.00019694222222222223, 'epoch': 0.04}


  3%|▎         | 300/11250 [03:17<1:54:51,  1.59it/s]

{'loss': 1.1657, 'learning_rate': 0.00019516444444444447, 'epoch': 0.05}


  4%|▎         | 400/11250 [04:21<1:53:53,  1.59it/s]

{'loss': 1.0877, 'learning_rate': 0.00019338666666666668, 'epoch': 0.07}


  4%|▍         | 500/11250 [05:24<1:53:06,  1.58it/s]

{'loss': 1.1257, 'learning_rate': 0.0001916088888888889, 'epoch': 0.09}


  5%|▌         | 600/11250 [06:27<1:51:36,  1.59it/s]

{'loss': 0.9694, 'learning_rate': 0.0001898488888888889, 'epoch': 0.11}


  6%|▌         | 700/11250 [07:30<1:50:26,  1.59it/s]

{'loss': 0.9292, 'learning_rate': 0.0001880711111111111, 'epoch': 0.12}


  7%|▋         | 800/11250 [08:33<1:49:18,  1.59it/s]

{'loss': 0.8547, 'learning_rate': 0.00018629333333333335, 'epoch': 0.14}


  8%|▊         | 900/11250 [09:36<1:48:29,  1.59it/s]

{'loss': 0.7776, 'learning_rate': 0.00018451555555555558, 'epoch': 0.16}


  9%|▉         | 1000/11250 [10:39<1:47:22,  1.59it/s]

{'loss': 0.7655, 'learning_rate': 0.0001827377777777778, 'epoch': 0.18}


 10%|▉         | 1100/11250 [11:42<1:46:15,  1.59it/s]

{'loss': 0.7303, 'learning_rate': 0.00018096000000000003, 'epoch': 0.2}


 11%|█         | 1200/11250 [12:45<1:45:33,  1.59it/s]

{'loss': 0.6834, 'learning_rate': 0.00017918222222222224, 'epoch': 0.21}


 12%|█▏        | 1300/11250 [13:48<1:44:40,  1.58it/s]

{'loss': 0.6377, 'learning_rate': 0.00017740444444444447, 'epoch': 0.23}


 12%|█▏        | 1400/11250 [14:51<1:43:09,  1.59it/s]

{'loss': 0.6422, 'learning_rate': 0.00017562666666666668, 'epoch': 0.25}


 13%|█▎        | 1500/11250 [15:54<1:42:09,  1.59it/s]

{'loss': 0.625, 'learning_rate': 0.0001738488888888889, 'epoch': 0.27}


 14%|█▍        | 1600/11250 [16:57<1:41:32,  1.58it/s]

{'loss': 0.5976, 'learning_rate': 0.00017207111111111113, 'epoch': 0.28}


 15%|█▌        | 1700/11250 [18:00<1:40:34,  1.58it/s]

{'loss': 0.6345, 'learning_rate': 0.00017029333333333334, 'epoch': 0.3}


 16%|█▌        | 1800/11250 [19:03<1:39:51,  1.58it/s]

{'loss': 0.5663, 'learning_rate': 0.00016851555555555555, 'epoch': 0.32}


 17%|█▋        | 1900/11250 [20:06<1:38:18,  1.59it/s]

{'loss': 0.598, 'learning_rate': 0.00016673777777777778, 'epoch': 0.34}


 18%|█▊        | 2000/11250 [21:09<1:36:55,  1.59it/s]

{'loss': 0.5908, 'learning_rate': 0.00016496, 'epoch': 0.36}


 19%|█▊        | 2100/11250 [22:12<1:35:58,  1.59it/s]

{'loss': 0.571, 'learning_rate': 0.00016318222222222223, 'epoch': 0.37}


 20%|█▉        | 2200/11250 [23:15<1:34:47,  1.59it/s]

{'loss': 0.6257, 'learning_rate': 0.00016140444444444444, 'epoch': 0.39}


 20%|██        | 2300/11250 [24:18<1:34:00,  1.59it/s]

{'loss': 0.6574, 'learning_rate': 0.00015962666666666667, 'epoch': 0.41}


 21%|██▏       | 2400/11250 [25:21<1:32:54,  1.59it/s]

{'loss': 0.6243, 'learning_rate': 0.00015784888888888888, 'epoch': 0.43}


 22%|██▏       | 2500/11250 [26:24<1:31:52,  1.59it/s]

{'loss': 0.5526, 'learning_rate': 0.00015607111111111112, 'epoch': 0.44}


 23%|██▎       | 2600/11250 [27:27<1:30:30,  1.59it/s]

{'loss': 0.6594, 'learning_rate': 0.0001543111111111111, 'epoch': 0.46}


 24%|██▍       | 2700/11250 [28:31<1:29:51,  1.59it/s]

{'loss': 0.6458, 'learning_rate': 0.00015253333333333335, 'epoch': 0.48}


 25%|██▍       | 2800/11250 [29:34<1:28:28,  1.59it/s]

{'loss': 0.6361, 'learning_rate': 0.00015075555555555555, 'epoch': 0.5}


 26%|██▌       | 2900/11250 [30:37<1:27:36,  1.59it/s]

{'loss': 0.6819, 'learning_rate': 0.0001489777777777778, 'epoch': 0.52}


 27%|██▋       | 3000/11250 [31:40<1:26:39,  1.59it/s]

{'loss': 0.7158, 'learning_rate': 0.0001472, 'epoch': 0.53}


 28%|██▊       | 3100/11250 [32:43<1:25:39,  1.59it/s]

{'loss': 0.7348, 'learning_rate': 0.00014542222222222224, 'epoch': 0.55}


 28%|██▊       | 3200/11250 [33:46<1:24:40,  1.58it/s]

{'loss': 0.7012, 'learning_rate': 0.00014364444444444445, 'epoch': 0.57}


 29%|██▉       | 3300/11250 [34:49<1:23:28,  1.59it/s]

{'loss': 0.7201, 'learning_rate': 0.00014186666666666668, 'epoch': 0.59}


 30%|███       | 3400/11250 [35:52<1:22:21,  1.59it/s]

{'loss': 0.6529, 'learning_rate': 0.0001400888888888889, 'epoch': 0.6}


 31%|███       | 3500/11250 [36:55<1:21:18,  1.59it/s]

{'loss': 0.718, 'learning_rate': 0.00013831111111111113, 'epoch': 0.62}


 32%|███▏      | 3600/11250 [37:58<1:20:21,  1.59it/s]

{'loss': 1.1469, 'learning_rate': 0.00013653333333333334, 'epoch': 0.64}


 33%|███▎      | 3700/11250 [39:01<1:19:21,  1.59it/s]

{'loss': 0.9117, 'learning_rate': 0.00013475555555555557, 'epoch': 0.66}


 34%|███▍      | 3800/11250 [40:04<1:18:01,  1.59it/s]

{'loss': 0.9806, 'learning_rate': 0.00013297777777777778, 'epoch': 0.68}


 35%|███▍      | 3900/11250 [41:07<1:16:53,  1.59it/s]

{'loss': 0.9532, 'learning_rate': 0.00013120000000000002, 'epoch': 0.69}


 36%|███▌      | 4000/11250 [42:16<1:27:50,  1.38it/s]

{'loss': 0.9618, 'learning_rate': 0.00012942222222222223, 'epoch': 0.71}


 36%|███▋      | 4100/11250 [43:25<1:21:11,  1.47it/s]

{'loss': 0.9733, 'learning_rate': 0.00012764444444444446, 'epoch': 0.73}


 37%|███▋      | 4200/11250 [44:34<1:19:55,  1.47it/s]

{'loss': 0.9104, 'learning_rate': 0.00012586666666666667, 'epoch': 0.75}


 38%|███▊      | 4300/11250 [45:39<1:12:19,  1.60it/s]

{'loss': 0.932, 'learning_rate': 0.0001240888888888889, 'epoch': 0.76}


 39%|███▉      | 4400/11250 [46:42<1:11:32,  1.60it/s]

{'loss': 0.8695, 'learning_rate': 0.00012231111111111112, 'epoch': 0.78}


 40%|████      | 4500/11250 [47:45<1:10:10,  1.60it/s]

{'loss': 0.88, 'learning_rate': 0.00012053333333333334, 'epoch': 0.8}


 41%|████      | 4600/11250 [48:48<1:09:12,  1.60it/s]

{'loss': 1.5663, 'learning_rate': 0.00011875555555555555, 'epoch': 0.82}


 42%|████▏     | 4700/11250 [49:51<1:08:19,  1.60it/s]

{'loss': 1.53, 'learning_rate': 0.00011697777777777779, 'epoch': 0.84}


 43%|████▎     | 4800/11250 [50:55<1:07:04,  1.60it/s]

{'loss': 1.529, 'learning_rate': 0.0001152, 'epoch': 0.85}


 44%|████▎     | 4900/11250 [51:58<1:06:22,  1.59it/s]

{'loss': 1.498, 'learning_rate': 0.00011342222222222223, 'epoch': 0.87}


 44%|████▍     | 5000/11250 [53:01<1:05:00,  1.60it/s]

{'loss': 1.4818, 'learning_rate': 0.00011164444444444444, 'epoch': 0.89}


 45%|████▌     | 5100/11250 [54:04<1:03:55,  1.60it/s]

{'loss': 1.4357, 'learning_rate': 0.00010986666666666668, 'epoch': 0.91}


 46%|████▌     | 5200/11250 [55:07<1:02:55,  1.60it/s]

{'loss': 1.4339, 'learning_rate': 0.00010808888888888889, 'epoch': 0.92}


 47%|████▋     | 5300/11250 [56:10<1:02:02,  1.60it/s]

{'loss': 1.4618, 'learning_rate': 0.00010631111111111112, 'epoch': 0.94}


 48%|████▊     | 5400/11250 [57:13<1:00:48,  1.60it/s]

{'loss': 1.4281, 'learning_rate': 0.00010453333333333333, 'epoch': 0.96}


 49%|████▉     | 5500/11250 [58:16<59:52,  1.60it/s]  

{'loss': 1.4366, 'learning_rate': 0.00010275555555555557, 'epoch': 0.98}


 50%|████▉     | 5600/11250 [59:19<58:49,  1.60it/s]  

{'loss': 1.4121, 'learning_rate': 0.00010097777777777779, 'epoch': 1.0}


 51%|█████     | 5700/11250 [1:00:23<57:45,  1.60it/s]

{'loss': 1.4007, 'learning_rate': 9.92e-05, 'epoch': 1.01}


 52%|█████▏    | 5800/11250 [1:01:26<56:42,  1.60it/s]  

{'loss': 1.3908, 'learning_rate': 9.742222222222222e-05, 'epoch': 1.03}


 52%|█████▏    | 5900/11250 [1:02:29<55:43,  1.60it/s]  

{'loss': 1.3717, 'learning_rate': 9.564444444444444e-05, 'epoch': 1.05}


 53%|█████▎    | 6000/11250 [1:03:32<54:40,  1.60it/s]

{'loss': 1.3882, 'learning_rate': 9.386666666666667e-05, 'epoch': 1.07}


 54%|█████▍    | 6100/11250 [1:04:35<53:43,  1.60it/s]

{'loss': 1.3586, 'learning_rate': 9.208888888888889e-05, 'epoch': 1.08}


 55%|█████▌    | 6200/11250 [1:05:38<52:36,  1.60it/s]

{'loss': 1.3455, 'learning_rate': 9.031111111111111e-05, 'epoch': 1.1}


 56%|█████▌    | 6300/11250 [1:06:41<51:30,  1.60it/s]

{'loss': 1.3508, 'learning_rate': 8.853333333333333e-05, 'epoch': 1.12}


 57%|█████▋    | 6400/11250 [1:07:44<50:25,  1.60it/s]

{'loss': 1.3184, 'learning_rate': 8.675555555555556e-05, 'epoch': 1.14}


 58%|█████▊    | 6500/11250 [1:08:47<49:27,  1.60it/s]

{'loss': 1.3365, 'learning_rate': 8.497777777777778e-05, 'epoch': 1.16}


 59%|█████▊    | 6600/11250 [1:09:50<48:21,  1.60it/s]

{'loss': 1.4316, 'learning_rate': 8.321777777777778e-05, 'epoch': 1.17}


 60%|█████▉    | 6700/11250 [1:10:54<47:21,  1.60it/s]

{'loss': 1.382, 'learning_rate': 8.144e-05, 'epoch': 1.19}


 60%|██████    | 6800/11250 [1:11:57<46:24,  1.60it/s]

{'loss': 1.3625, 'learning_rate': 7.966222222222223e-05, 'epoch': 1.21}


 61%|██████▏   | 6900/11250 [1:13:00<45:22,  1.60it/s]

{'loss': 1.3804, 'learning_rate': 7.788444444444445e-05, 'epoch': 1.23}


 62%|██████▏   | 7000/11250 [1:14:03<44:06,  1.61it/s]

{'loss': 1.3825, 'learning_rate': 7.610666666666667e-05, 'epoch': 1.24}


 63%|██████▎   | 7100/11250 [1:15:06<43:06,  1.60it/s]

{'loss': 1.3117, 'learning_rate': 7.43288888888889e-05, 'epoch': 1.26}


 64%|██████▍   | 7200/11250 [1:16:09<42:11,  1.60it/s]

{'loss': 1.374, 'learning_rate': 7.255111111111112e-05, 'epoch': 1.28}


 65%|██████▍   | 7300/11250 [1:17:12<41:05,  1.60it/s]

{'loss': 1.3357, 'learning_rate': 7.077333333333334e-05, 'epoch': 1.3}


 66%|██████▌   | 7400/11250 [1:18:15<40:05,  1.60it/s]

{'loss': 1.3415, 'learning_rate': 6.899555555555555e-05, 'epoch': 1.32}


 67%|██████▋   | 7500/11250 [1:19:18<39:10,  1.60it/s]

{'loss': 1.3412, 'learning_rate': 6.721777777777777e-05, 'epoch': 1.33}


 68%|██████▊   | 7600/11250 [1:20:22<37:58,  1.60it/s]

{'loss': 1.3315, 'learning_rate': 6.544e-05, 'epoch': 1.35}


 68%|██████▊   | 7700/11250 [1:21:25<36:56,  1.60it/s]

{'loss': 1.3273, 'learning_rate': 6.366222222222222e-05, 'epoch': 1.37}


 69%|██████▉   | 7800/11250 [1:22:28<35:55,  1.60it/s]

{'loss': 1.3196, 'learning_rate': 6.188444444444444e-05, 'epoch': 1.39}


 70%|███████   | 7900/11250 [1:23:31<34:55,  1.60it/s]

{'loss': 1.3312, 'learning_rate': 6.0106666666666665e-05, 'epoch': 1.4}


 71%|███████   | 8000/11250 [1:24:34<33:53,  1.60it/s]

{'loss': 1.3072, 'learning_rate': 5.832888888888889e-05, 'epoch': 1.42}


 72%|███████▏  | 8100/11250 [1:25:37<32:48,  1.60it/s]

{'loss': 1.3156, 'learning_rate': 5.655111111111111e-05, 'epoch': 1.44}


 73%|███████▎  | 8200/11250 [1:26:40<31:50,  1.60it/s]

{'loss': 1.2831, 'learning_rate': 5.477333333333333e-05, 'epoch': 1.46}


 74%|███████▍  | 8300/11250 [1:27:44<30:42,  1.60it/s]

{'loss': 1.2804, 'learning_rate': 5.2995555555555556e-05, 'epoch': 1.48}


 75%|███████▍  | 8400/11250 [1:28:47<29:38,  1.60it/s]

{'loss': 1.3131, 'learning_rate': 5.121777777777778e-05, 'epoch': 1.49}


 76%|███████▌  | 8500/11250 [1:29:50<28:40,  1.60it/s]

{'loss': 1.2933, 'learning_rate': 4.944e-05, 'epoch': 1.51}


 76%|███████▋  | 8600/11250 [1:30:53<27:36,  1.60it/s]

{'loss': 1.3177, 'learning_rate': 4.7680000000000004e-05, 'epoch': 1.53}


 77%|███████▋  | 8700/11250 [1:31:56<26:35,  1.60it/s]

{'loss': 1.2945, 'learning_rate': 4.590222222222223e-05, 'epoch': 1.55}


 78%|███████▊  | 8800/11250 [1:32:59<25:31,  1.60it/s]

{'loss': 1.309, 'learning_rate': 4.412444444444444e-05, 'epoch': 1.56}


 79%|███████▉  | 8900/11250 [1:34:02<24:27,  1.60it/s]

{'loss': 1.3098, 'learning_rate': 4.2346666666666666e-05, 'epoch': 1.58}


 80%|████████  | 9000/11250 [1:35:05<23:24,  1.60it/s]

{'loss': 1.3024, 'learning_rate': 4.056888888888889e-05, 'epoch': 1.6}


 81%|████████  | 9100/11250 [1:36:08<22:28,  1.59it/s]

{'loss': 1.2904, 'learning_rate': 3.879111111111111e-05, 'epoch': 1.62}


 82%|████████▏ | 9200/11250 [1:37:15<26:56,  1.27it/s]

{'loss': 1.2988, 'learning_rate': 3.7031111111111114e-05, 'epoch': 1.64}


 83%|████████▎ | 9300/11250 [1:38:31<24:01,  1.35it/s]

{'loss': 1.2944, 'learning_rate': 3.525333333333333e-05, 'epoch': 1.65}


 84%|████████▎ | 9400/11250 [1:39:39<19:05,  1.61it/s]

{'loss': 1.2921, 'learning_rate': 3.347555555555555e-05, 'epoch': 1.67}


 84%|████████▍ | 9500/11250 [1:40:41<18:09,  1.61it/s]

{'loss': 1.2929, 'learning_rate': 3.169777777777778e-05, 'epoch': 1.69}


 85%|████████▌ | 9600/11250 [1:41:43<17:04,  1.61it/s]

{'loss': 1.2659, 'learning_rate': 2.9920000000000005e-05, 'epoch': 1.71}


 86%|████████▌ | 9700/11250 [1:42:45<16:00,  1.61it/s]

{'loss': 1.2531, 'learning_rate': 2.8142222222222224e-05, 'epoch': 1.72}


 87%|████████▋ | 9800/11250 [1:43:47<14:59,  1.61it/s]

{'loss': 1.2831, 'learning_rate': 2.6364444444444447e-05, 'epoch': 1.74}


 88%|████████▊ | 9900/11250 [1:44:49<13:57,  1.61it/s]

{'loss': 1.285, 'learning_rate': 2.458666666666667e-05, 'epoch': 1.76}


 89%|████████▉ | 10000/11250 [1:45:51<12:57,  1.61it/s]***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 1.2616, 'learning_rate': 2.2808888888888892e-05, 'epoch': 1.78}



 89%|████████▉ | 10000/11250 [1:48:16<12:57,  1.61it/s]Saving model checkpoint to ./checkpoint-10000
Configuration saved in ./checkpoint-10000\config.json
Model weights saved in ./checkpoint-10000\pytorch_model.bin
Image processor saved in ./checkpoint-10000\preprocessor_config.json


{'eval_loss': 1.1885565519332886, 'eval_accuracy': 0.7632, 'eval_runtime': 145.0008, 'eval_samples_per_second': 34.483, 'eval_steps_per_second': 4.31, 'epoch': 1.78}


 90%|████████▉ | 10100/11250 [1:49:19<11:53,  1.61it/s]   

{'loss': 1.2529, 'learning_rate': 2.103111111111111e-05, 'epoch': 1.8}


 91%|█████████ | 10200/11250 [1:50:21<10:50,  1.61it/s]

{'loss': 1.2494, 'learning_rate': 1.9253333333333334e-05, 'epoch': 1.81}


 92%|█████████▏| 10300/11250 [1:51:23<09:48,  1.61it/s]

{'loss': 1.2588, 'learning_rate': 1.7475555555555557e-05, 'epoch': 1.83}


 92%|█████████▏| 10400/11250 [1:52:25<08:46,  1.61it/s]

{'loss': 1.2548, 'learning_rate': 1.569777777777778e-05, 'epoch': 1.85}


 93%|█████████▎| 10500/11250 [1:53:27<07:44,  1.61it/s]

{'loss': 1.2543, 'learning_rate': 1.3919999999999999e-05, 'epoch': 1.87}


 94%|█████████▍| 10600/11250 [1:54:29<06:42,  1.62it/s]

{'loss': 1.2578, 'learning_rate': 1.2142222222222223e-05, 'epoch': 1.88}


 95%|█████████▌| 10700/11250 [1:55:31<05:42,  1.61it/s]

{'loss': 1.2917, 'learning_rate': 1.0364444444444444e-05, 'epoch': 1.9}


 96%|█████████▌| 10800/11250 [1:56:33<04:38,  1.61it/s]

{'loss': 1.2614, 'learning_rate': 8.586666666666667e-06, 'epoch': 1.92}


 97%|█████████▋| 10900/11250 [1:57:35<03:36,  1.61it/s]

{'loss': 1.24, 'learning_rate': 6.80888888888889e-06, 'epoch': 1.94}


 98%|█████████▊| 11000/11250 [1:58:37<02:35,  1.61it/s]

{'loss': 1.2315, 'learning_rate': 5.031111111111111e-06, 'epoch': 1.96}


 99%|█████████▊| 11100/11250 [1:59:39<01:33,  1.61it/s]

{'loss': 1.23, 'learning_rate': 3.2533333333333332e-06, 'epoch': 1.97}


100%|█████████▉| 11200/11250 [2:00:41<00:30,  1.61it/s]

{'loss': 1.2594, 'learning_rate': 1.4755555555555555e-06, 'epoch': 1.99}


100%|██████████| 11250/11250 [2:01:12<00:00,  1.61it/s]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./checkpoint-10000 (score: 1.1885565519332886).
100%|██████████| 11250/11250 [2:01:12<00:00,  1.55it/s]
Saving model checkpoint to ./
Configuration saved in ./config.json
Model weights saved in ./pytorch_model.bin
Image processor saved in ./preprocessor_config.json


{'train_runtime': 7272.7674, 'train_samples_per_second': 12.375, 'train_steps_per_second': 1.547, 'train_loss': 1.129531569756402, 'epoch': 2.0}
***** train metrics *****
  epoch                    =        2.0
  train_loss               =     1.1295
  train_runtime            = 2:01:12.76
  train_samples_per_second =     12.375
  train_steps_per_second   =      1.547


In [20]:
dataset_test = load_dataset('cifar10',split='test')
dataset_test

Found cached dataset cifar10 (C:/Users/USER/.cache/huggingface/datasets/cifar10/plain_text/1.0.0/447d6ec4733dddd1ce3bb577c7166b986eaa4c538dcd9e805ba61f35674a9de4)


Dataset({
    features: ['img', 'label'],
    num_rows: 10000
})

## Testing

In [18]:
model = SwinForImageClassification.from_pretrained(
    './checkpoint-10000/',
    num_labels=10,
    id2label=id2label,
    label2id=label2id)

loading configuration file ./checkpoint-10000/config.json
Model config SwinConfig {
  "_name_or_path": "microsoft/swin-tiny-patch4-window7-224",
  "architectures": [
    "SwinForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "depths": [
    2,
    2,
    6,
    2
  ],
  "drop_path_rate": 0.1,
  "embed_dim": 96,
  "encoder_stride": 32,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "plane",
    "1": "car",
    "2": "bird",
    "3": "cat",
    "4": "deer",
    "5": "dog",
    "6": "frog",
    "7": "horse",
    "8": "ship",
    "9": "truck"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "label2id": {
    "bird": 2,
    "car": 1,
    "cat": 3,
    "deer": 4,
    "dog": 5,
    "frog": 6,
    "horse": 7,
    "plane": 0,
    "ship": 8,
    "truck": 9
  },
  "layer_norm_eps": 1e-05,
  "mlp_ratio": 4.0,
  "model_type": "swin",
  "num_channels": 3,
  "num_heads": [
    3,
    6,
    12,
    24
  ],
  "num_layers"

In [21]:
prepared_ds_test = dataset_test.with_transform(transform)

In [22]:
# Predictions
y_test_predict = trainer.predict(prepared_ds_test)

# Take a look at the predictions
y_test_predict

***** Running Prediction *****
  Num examples = 10000
  Batch size = 8
100%|██████████| 1250/1250 [05:01<00:00,  4.14it/s]


PredictionOutput(predictions=array([[-1.347e+00, -1.432e+00, -1.154e+00, ..., -1.906e+00, -1.865e+00,
        -2.371e+00],
       [ 1.724e-03,  1.224e-01, -1.603e+00, ..., -2.043e+00,  8.379e-01,
        -6.997e-01],
       [-6.089e-01,  8.643e-01, -1.550e+00, ..., -1.796e+00,  6.260e-01,
         1.650e-01],
       ...,
       [-1.830e+00, -1.491e+00, -5.156e-01, ..., -1.500e+00, -1.869e+00,
        -2.420e+00],
       [-5.273e-01, -2.466e-01, -7.891e-01, ..., -1.221e+00, -8.560e-01,
        -1.460e+00],
       [-1.527e+00, -1.594e+00, -2.480e-01, ...,  1.045e+00, -1.963e+00,
        -1.500e+00]], dtype=float16), label_ids=array([3, 8, 8, ..., 5, 1, 7], dtype=int64), metrics={'test_loss': 1.1890652179718018, 'test_accuracy': 0.764, 'test_runtime': 302.3716, 'test_samples_per_second': 33.072, 'test_steps_per_second': 4.134})