In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.data.dataset import random_split 
from transformers import Trainer, TrainingArguments, AutoFeatureExtractor, BeitImageProcessor, BeitForImageClassification
from torch.utils.data import TensorDataset
from datasets import load_dataset, load_from_disk, Dataset
import torch.optim as optim
import torch.nn as nn
import json
import pandas as pd
import numpy as np
import transformers
import evaluate
import huggingface_hub


In [2]:
huggingface_hub.notebook_login() 

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [91]:
# #Initialise Cuda and check that Cuda is available
# device = torch.device("cuda")
# print(device)
# print(torch.cuda.is_available())

In [3]:
processor = BeitImageProcessor.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k')
model = BeitForImageClassification.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k')
feature_extractor = AutoFeatureExtractor.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k')



In [4]:
feature_extractor = AutoFeatureExtractor.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k')

print(feature_extractor.size)

{'height': 224, 'width': 224}


In [6]:
#Load Dataset
ds = load_dataset("cats_vs_dogs")


Found cached dataset cats_vs_dogs (/home/felixmorgan/.cache/huggingface/datasets/cats_vs_dogs/default/1.0.0/d4fe9cf31b294ed8639aa58f7d8ee13fe189011837038ed9a774fde19a911fcb)


  0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
print(ds['train'][100])

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=360x268 at 0x7F6DF2C154F0>, 'labels': 0}


In [138]:
from torchvision.transforms import (
    CenterCrop,
    Compose,
    Normalize,
    RandomHorizontalFlip,
    RandomResizedCrop,
    Resize,
    ToTensor,
)

normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
train_transforms = Compose(
        [
            RandomResizedCrop([224, 224]),
            RandomHorizontalFlip(),
            ToTensor(),
            normalize,
        ]
    )

val_transforms = Compose(
        [
            Resize([224, 224]),
            CenterCrop([224, 224]),
            ToTensor(),
            normalize,
        ]
    )

def preprocess_train(example_batch):
    """Apply train_transforms across a batch."""
    example_batch["pixel_values"] = [
        train_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch

def preprocess_val(example_batch):
    """Apply val_transforms across a batch."""
    example_batch["pixel_values"] = [val_transforms(image.convert("RGB")) for image in example_batch["image"]]
    return example_batch

In [139]:
model_ds = ds.shuffle(seed=42)

Loading cached shuffled indices for dataset at /home/felixmorgan/.cache/huggingface/datasets/cats_vs_dogs/default/1.0.0/d4fe9cf31b294ed8639aa58f7d8ee13fe189011837038ed9a774fde19a911fcb/cache-3f8b93d5734254c1.arrow


In [140]:
model_ds = model_ds.with_transform(transform)

In [141]:
model_ds = model_ds['train'].train_test_split(test_size=0.2)

In [142]:
train_ds = model_ds['train']
val_ds = model_ds['test']

In [143]:
train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val)

In [144]:
print(model_ds)

DatasetDict({
    train: Dataset({
        features: ['image', 'labels'],
        num_rows: 18728
    })
    test: Dataset({
        features: ['image', 'labels'],
        num_rows: 4682
    })
})


In [153]:

def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["labels"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}
    

In [154]:
metric = evaluate.load("accuracy")

def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)


In [155]:
training_args = TrainingArguments(
    output_dir="./beit-base",
    remove_unused_columns=False,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=True,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [156]:
model = BeitForImageClassification.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k', num_labels=2,ignore_mismatched_sizes=True)
               

loading configuration file config.json from cache at /home/felixmorgan/.cache/huggingface/hub/models--microsoft--beit-base-patch16-224-pt22k-ft22k/snapshots/9da301148150e37e533abef672062fa49f6bda4f/config.json
Model config BeitConfig {
  "architectures": [
    "BeitForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "auxiliary_channels": 256,
  "auxiliary_concat_input": false,
  "auxiliary_loss_weight": 0.4,
  "auxiliary_num_convs": 1,
  "drop_path_rate": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "layer_scale_init_value": 0.1,
  "model_type": "beit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "out_indices": [
    3,
    5,
    7,
    11
  ],
  "patch_size": 16,
  "pool_scales": [
    1,
    2,
    3,
    6
  ],
  "semantic_loss_ignore_index": 255,
  "torch_dtype": "float32",
  "transforme

In [157]:
trainer = Trainer(
    model = model,
    args=training_args,
    train_dataset=model_ds["train"],
    eval_dataset=model_ds["test"],
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,

)


/home/felixmorgan/PycharmProjects/pythonProject/./beit-base is already a clone of https://huggingface.co/ChasingMercer/beit-base. Make sure you pull the latest changes with `repo.git_pull()`.


In [158]:
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

***** Running training *****
  Num examples = 18728
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 4
  Total optimization steps = 1755
  Number of trainable parameters = 85763522


Epoch,Training Loss,Validation Loss,Accuracy
0,0.0303,0.018584,0.994233
1,0.0374,0.015015,0.995515
2,0.0559,0.011637,0.997651


***** Running Evaluation *****
  Num examples = 4682
  Batch size = 8
Saving model checkpoint to ./beit-base/checkpoint-585
Configuration saved in ./beit-base/checkpoint-585/config.json
Model weights saved in ./beit-base/checkpoint-585/pytorch_model.bin
Image processor saved in ./beit-base/checkpoint-585/preprocessor_config.json
Image processor saved in ./beit-base/preprocessor_config.json
Several commits (2) will be pushed upstream.
***** Running Evaluation *****
  Num examples = 4682
  Batch size = 8
Saving model checkpoint to ./beit-base/checkpoint-1170
Configuration saved in ./beit-base/checkpoint-1170/config.json
Model weights saved in ./beit-base/checkpoint-1170/pytorch_model.bin
Image processor saved in ./beit-base/checkpoint-1170/preprocessor_config.json
Image processor saved in ./beit-base/preprocessor_config.json
***** Running Evaluation *****
  Num examples = 4682
  Batch size = 8
Saving model checkpoint to ./beit-base/checkpoint-1755
Configuration saved in ./beit-base/check

Upload file pytorch_model.bin:   0%|          | 32.0k/331M [00:11<?, ?B/s]

Upload file runs/Mar06_16-20-32_pop-os/events.out.tfevents.1678119648.pop-os.164622.12:  99%|#########9| 32.0k…

remote: Scanning LFS files of refs/heads/main for validity...        
remote: LFS file scan complete.        
To https://huggingface.co/ChasingMercer/beit-base
   66bba7d..0d176a1  main -> main

To https://huggingface.co/ChasingMercer/beit-base
   0d176a1..7e9bfa9  main -> main



***** train metrics *****
  epoch                    =          3.0
  total_flos               = 4052491304GF
  train_loss               =       0.0804
  train_runtime            =   1:24:51.75
  train_samples_per_second =       11.034
  train_steps_per_second   =        0.345


In [116]:
print(model_ds['train'][45]['pixel_values'].shape)

torch.Size([3, 224, 224])


In [None]:
evaluator = trainer.evaluate(model_ds['valid'])
predictor = trainer.predict(model_ds['test'])

predictions = [np.argmax(i) for i in predictor.predictions]
print(evaluator)
print(predictions)


# trainer.log_metrics("train", metrics)
# trainer.save_metrics("train", metrics)
# trainer.save_metrics("eval", metrics["eval_f1", "eval_accuracy"])

