<a href="https://colab.research.google.com/github/MuhammadIrzam447/MultiModel/blob/master/Train_20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers evaluate datasets

In [None]:
import requests
import torch
from PIL import Image
from transformers import *
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
model_name = "google/vit-base-patch16-224"
image_processor = ViTImageProcessor.from_pretrained(model_name)
model = ViTForImageClassification.from_pretrained(model_name).to(device)

## Loading a Custom Dataset using `ImageFolder`


In [None]:
from datasets import load_dataset

ds_train = load_dataset("imagefolder", data_dir="/content/Dataset(s)/jointHF-train+test_unseen/train", split="train")

Resolving data files:   0%|          | 0/19000 [00:00<?, ?it/s]

In [None]:
ds_train

Dataset({
    features: ['image', 'label'],
    num_rows: 19000
})

In [None]:
ds_val = load_dataset("imagefolder", data_dir="/content/Dataset(s)/jointHF-train+test_unseen/test", split="train")

Resolving data files:   0%|          | 0/4000 [00:00<?, ?it/s]

In [None]:
ds_val

Dataset({
    features: ['image', 'label'],
    num_rows: 4000
})

# New Section

In [None]:
# Use _3.png for Filtering out Encoded Images
# Use _4.png for Filtering out Just Images

import os
def filter_funtion(example):
    img = example["image"]
    filename = os.path.basename(img.filename)

    return filename.endswith("_4.png")

In [None]:
ds_train = ds_train.filter(filter_funtion)

In [None]:
ds_val = ds_val.filter(filter_funtion)

# Exploring the Data

In [None]:
labels = ds_train.features["label"]
labels

ClassLabel(names=['0', '1'], id=None)

In [None]:
labels.int2str(ds_train[532]["label"])

'0'

# Preprocessing the Data

In [None]:
def transform(examples):
  # convert all images to RGB format, then preprocessing it
  # using our image processor
  inputs = image_processor([img.convert("RGB") for img in examples["image"]], return_tensors="pt")
  # we also shouldn't forget about the labels
  inputs["labels"] = examples["label"]
  return inputs

In [None]:
# use the with_transform() method to apply the transform to the dataset on the fly during training
train_dataset = ds_train.with_transform(transform)
val_dataset = ds_val.with_transform(transform)

In [None]:
for item in train_dataset:
  print(item["pixel_values"].shape)
  print(item["labels"])
  break

torch.Size([3, 224, 224])
0


In [None]:
# extract the labels for our dataset
labels = ds_train.features["label"].names
labels

['0', '1']

In [None]:
import torch

def collate_fn(batch):
  return {
      "pixel_values": torch.stack([x["pixel_values"] for x in batch]),
      "labels": torch.tensor([x["labels"] for x in batch]),
  }

In [None]:
train_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 9500
})

In [None]:
val_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 2000
})

# Defining the Metrics

In [None]:
from evaluate import load
import numpy as np
from sklearn.metrics import roc_auc_score

# load the accuracy and f1 metrics from the evaluate module
accuracy = load("accuracy")
f1 = load("f1")

def compute_metrics(eval_pred):
  # compute the accuracy and f1 scores & return them
  accuracy_score = accuracy.compute(predictions=np.argmax(eval_pred.predictions, axis=1), references=eval_pred.label_ids)
  f1_score = f1.compute(predictions=np.argmax(eval_pred.predictions, axis=1), references=eval_pred.label_ids, average="macro")

  auroc_score = roc_auc_score(eval_pred.label_ids, np.argmax(eval_pred.predictions, axis=1))
  print(f"AUROC Score: {auroc_score:.4f}")

  return {**accuracy_score, **f1_score}

# Training the Model

In [None]:
# load the ViT model
model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True,
)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--vit-base-patch16-224/snapshots/3f49326eb077187dfe1c2a2bb15fbd74e6ab91e3/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "0",
    "1": "1"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "0": "0",
    "1": "1"
  },
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": true,
  "transformers_version": "4.32.1"
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--vit-base-patch16-224/snapshots/3f49326eb077187dfe1c2a2bb15fbd74e6ab9

In [None]:
# !pip install accelerate -U

In [None]:
# !pip install transformers[torch]

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="/content/Model/Models-Train-20", # output directory
  per_device_train_batch_size=32, # batch size per device during training
  evaluation_strategy="steps",    # evaluation strategy to adopt during training
  num_train_epochs=20,             # total number of training epochs
  # fp16=True,                    # use mixed precision
  save_steps=1180,                # number of update steps before saving checkpoint
  eval_steps=1180,                # number of update steps before evaluating
  logging_steps=1180,             # number of update steps before logging
  # save_steps=50,
  # eval_steps=50,
  # logging_steps=50,
  save_total_limit=4,             # limit the total amount of checkpoints on disk
  remove_unused_columns=False,    # remove unused columns from the dataset
  push_to_hub=False,              # do not push the model to the hub
  report_to='tensorboard',        # report metrics to tensorboard
  load_best_model_at_end=True,    # load the best model at the end of training
)


Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
PyTorch: setting up devices


In [None]:
train_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 9500
})

In [None]:
val_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 2000
})

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,                        # the instantiated 🤗 Transformers model to be trained
    args=training_args,                 # training arguments, defined above
    data_collator=collate_fn,           # the data collator that will be used for batching
    compute_metrics=compute_metrics,    # the metrics function that will be used for evaluation
    train_dataset=train_dataset,        # training dataset
    eval_dataset=val_dataset,           # evaluation dataset
    tokenizer=image_processor,          # the processor that will be used for preprocessing the images
)

In [26]:
# start training
trainer.train()

***** Running training *****
  Num examples = 9,500
  Num Epochs = 20
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 5,940
  Number of trainable parameters = 85,800,194


Step,Training Loss,Validation Loss,Accuracy,F1
1180,0.4888,0.66221,0.679,0.670567
2360,0.0872,1.274229,0.7265,0.715515
3540,0.0062,1.649989,0.7285,0.718387
4720,0.0003,1.821757,0.7255,0.715163
5900,0.0,1.860298,0.7255,0.715163


***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.6816


Saving model checkpoint to /content/Model/Models-Train-20/checkpoint-1180
Configuration saved in /content/Model/Models-Train-20/checkpoint-1180/config.json
Model weights saved in /content/Model/Models-Train-20/checkpoint-1180/pytorch_model.bin
Image processor saved in /content/Model/Models-Train-20/checkpoint-1180/preprocessor_config.json
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7225


Saving model checkpoint to /content/Model/Models-Train-20/checkpoint-2360
Configuration saved in /content/Model/Models-Train-20/checkpoint-2360/config.json
Model weights saved in /content/Model/Models-Train-20/checkpoint-2360/pytorch_model.bin
Image processor saved in /content/Model/Models-Train-20/checkpoint-2360/preprocessor_config.json
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7265


Saving model checkpoint to /content/Model/Models-Train-20/checkpoint-3540
Configuration saved in /content/Model/Models-Train-20/checkpoint-3540/config.json
Model weights saved in /content/Model/Models-Train-20/checkpoint-3540/pytorch_model.bin
Image processor saved in /content/Model/Models-Train-20/checkpoint-3540/preprocessor_config.json
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7231


Saving model checkpoint to /content/Model/Models-Train-20/checkpoint-4720
Configuration saved in /content/Model/Models-Train-20/checkpoint-4720/config.json
Model weights saved in /content/Model/Models-Train-20/checkpoint-4720/pytorch_model.bin
Image processor saved in /content/Model/Models-Train-20/checkpoint-4720/preprocessor_config.json
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7231


Saving model checkpoint to /content/Model/Models-Train-20/checkpoint-5900
Configuration saved in /content/Model/Models-Train-20/checkpoint-5900/config.json
Model weights saved in /content/Model/Models-Train-20/checkpoint-5900/pytorch_model.bin
Image processor saved in /content/Model/Models-Train-20/checkpoint-5900/preprocessor_config.json
Deleting older checkpoint [/content/Model/Models-Train-20/checkpoint-2360] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/Model/Models-Train-20/checkpoint-1180 (score: 0.66221022605896).


TrainOutput(global_step=5940, training_loss=0.1157257354892759, metrics={'train_runtime': 8848.338, 'train_samples_per_second': 21.473, 'train_steps_per_second': 0.671, 'total_flos': 1.472347802677248e+19, 'train_loss': 0.1157257354892759, 'epoch': 20.0})

In [None]:
# trainer.evaluate(dataset["test"])
trainer.evaluate()

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# import shutil, os

# # Define the source folder path (in Colab)
# source_folder_path = '/content/output/checkpoint-7000'

# # Define the destination folder path (in Google Drive)
# destination_folder_path = "/content/drive/MyDrive/Colab Notebooks/Hateful-Memes/Vit/checkpoint-7000"

# # Remove the existing destination folder (if it exists)
# if os.path.exists(destination_folder_path):
#     shutil.rmtree(destination_folder_path)

# # Copy the folder
# shutil.copytree(source_folder_path, destination_folder_path)

In [None]:
# # start tensorboard
# # %load_ext tensorboard
# %reload_ext tensorboard
# %tensorboard --logdir /content/Model/Models-Train-15/runs

## Alternatively: Training using PyTorch Loop
Run the two below cells to fine-tune using a regular PyTorch loop if you want.

In [None]:
# Training loop
from torch.utils.tensorboard import SummaryWriter
from torch.optim import AdamW
from torch.utils.data import DataLoader

batch_size = 32

train_dataset_loader = DataLoader(dataset["train"], collate_fn=collate_fn, batch_size=batch_size, shuffle=True)
valid_dataset_loader = DataLoader(dataset["validation"], collate_fn=collate_fn, batch_size=batch_size, shuffle=True)

# define the optimizer
optimizer = AdamW(model.parameters(), lr=1e-5)

log_dir = "./image-classification/tensorboard"
summary_writer = SummaryWriter(log_dir=log_dir)

num_epochs = 3
model = model.to(device)
# print some statistics before training
# number of training steps
n_train_steps = num_epochs * len(train_dataset_loader)
# number of validation steps
n_valid_steps = len(valid_dataset_loader)
# current training step
current_step = 0
# logging, eval & save steps
save_steps = 1000

def compute_metrics(eval_pred):
  accuracy_score = accuracy.compute(predictions=eval_pred.predictions, references=eval_pred.label_ids)
  f1_score = f1.compute(predictions=eval_pred.predictions, references=eval_pred.label_ids, average="macro")
  return {**accuracy_score, **f1_score}

In [None]:
for epoch in range(num_epochs):
    # set the model to training mode
    model.train()
    # initialize the training loss
    train_loss = 0
    # initialize the progress bar
    progress_bar = tqdm(range(current_step, n_train_steps), "Training", dynamic_ncols=True, ncols=80)
    for batch in train_dataset_loader:
      if (current_step+1) % save_steps == 0:
        ### evaluation code ###
        # evaluate on the validation set
        # if the current step is a multiple of the save steps
        print()
        print(f"Validation at step {current_step}...")
        print()
        # set the model to evaluation mode
        model.eval()
        # initialize our lists that store the predictions and the labels
        predictions, labels = [], []
        # initialize the validation loss
        valid_loss = 0
        for batch in valid_dataset_loader:
            # get the batch
            pixel_values = batch["pixel_values"].to(device)
            label_ids = batch["labels"].to(device)
            # forward pass
            outputs = model(pixel_values=pixel_values, labels=label_ids)
            # get the loss
            loss = outputs.loss
            valid_loss += loss.item()
            # free the GPU memory
            logits = outputs.logits.detach().cpu()
            # add the predictions to the list
            predictions.extend(logits.argmax(dim=-1).tolist())
            # add the labels to the list
            labels.extend(label_ids.tolist())
        # make the EvalPrediction object that the compute_metrics function expects
        eval_prediction = EvalPrediction(predictions=predictions, label_ids=labels)
        # compute the metrics
        metrics = compute_metrics(eval_prediction)
        # print the stats
        print()
        print(f"Epoch: {epoch}, Step: {current_step}, Train Loss: {train_loss / save_steps:.4f}, " +
              f"Valid Loss: {valid_loss / n_valid_steps:.4f}, Accuracy: {metrics['accuracy']}, " +
              f"F1 Score: {metrics['f1']}")
        print()
        # log the metrics
        summary_writer.add_scalar("valid_loss", valid_loss / n_valid_steps, global_step=current_step)
        summary_writer.add_scalar("accuracy", metrics["accuracy"], global_step=current_step)
        summary_writer.add_scalar("f1", metrics["f1"], global_step=current_step)
        # save the model
        model.save_pretrained(f"./vit-base-food/checkpoint-{current_step}")
        image_processor.save_pretrained(f"./vit-base-food/checkpoint-{current_step}")
        # get the model back to train mode
        model.train()
        # reset the train and valid loss
        train_loss, valid_loss = 0, 0
      ### training code below ###
      # get the batch & convert to tensor
      pixel_values = batch["pixel_values"].to(device)
      labels = batch["labels"].to(device)
      # forward pass
      outputs = model(pixel_values=pixel_values, labels=labels)
      # get the loss
      loss = outputs.loss
      # backward pass
      loss.backward()
      # update the weights
      optimizer.step()
      # zero the gradients
      optimizer.zero_grad()
      # log the loss
      loss_v = loss.item()
      train_loss += loss_v
      # increment the step
      current_step += 1
      progress_bar.update(1)
      # log the training loss
      summary_writer.add_scalar("train_loss", loss_v, global_step=current_step)
