<a href="https://colab.research.google.com/github/MuhammadIrzam447/MultiModel/blob/master/Train_13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !gdown https://drive.google.com/uc?id=1wgl3QGXZ4m2aLg3T-1TDXQqSP31RuXgL

In [None]:
# !unzip /content/hateful_train+test_unseen.zip

In [None]:
!pip install transformers evaluate datasets

In [None]:
import requests
import torch
from PIL import Image
from transformers import *
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# the model name
model_name = "google/vit-base-patch16-224"
# load the image processor
image_processor = ViTImageProcessor.from_pretrained(model_name)
# loading the pre-trained model
model = ViTForImageClassification.from_pretrained(model_name).to(device)

In [None]:
import urllib.parse as parse
import os

# a function to determine whether a string is a URL or not
def is_url(string):
    try:
        result = parse.urlparse(string)
        return all([result.scheme, result.netloc, result.path])
    except:
        return False

# a function to load an image
def load_image(image_path):
    if is_url(image_path):
        return Image.open(requests.get(image_path, stream=True).raw)
    elif os.path.exists(image_path):
        return Image.open(image_path)

In [None]:
def get_prediction(model, url_or_path):
  # load the image
  img = load_image(url_or_path)
  # preprocessing the image
  pixel_values = image_processor(img, return_tensors="pt")["pixel_values"].to(device)
  # perform inference
  output = model(pixel_values)
  # get the label id and return the class name
  return model.config.id2label[int(output.logits.softmax(dim=1).argmax())]

In [None]:
get_prediction(model, "http://images.cocodataset.org/test-stuff2017/000000000128.jpg")

'Indian elephant, Elephas maximus'

# Loading our Dataset

In [None]:
# from datasets import load_dataset

# # download & load the dataset
# ds = load_dataset("food101")

## Loading a Custom Dataset using `ImageFolder`
Run the three below cells to load a custom dataset (that's not in the Hub) using `ImageFolder`

In [None]:
# import requests
# from tqdm import tqdm

# def get_file(url):
#   response = requests.get(url, stream=True)
#   total_size = int(response.headers.get('content-length', 0))
#   filename = None
#   content_disposition = response.headers.get('content-disposition')
#   if content_disposition:
#       parts = content_disposition.split(';')
#       for part in parts:
#           if 'filename' in part:
#               filename = part.split('=')[1].strip('"')
#   if not filename:
#       filename = os.path.basename(url)
#   block_size = 1024 # 1 Kibibyte
#   tqdm_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
#   with open(filename, 'wb') as file:
#       for data in response.iter_content(block_size):
#           tqdm_bar.update(len(data))
#           file.write(data)
#   tqdm_bar.close()
#   print(f"Downloaded {filename} ({total_size} bytes)")
#   return filename

In [None]:
# import zipfile
# import os

# def download_and_extract_dataset():
#   # dataset from https://github.com/udacity/dermatologist-ai
#   # 5.3GB
#   train_url = "https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/train.zip"
#   # 824.5MB
#   valid_url = "https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/valid.zip"
#   # 5.1GB
#   test_url  = "https://s3-us-west-1.amazonaws.com/udacity-dlnfd/datasets/skin-cancer/test.zip"
#   for i, download_link in enumerate([valid_url, train_url, test_url]):
#     data_dir = get_file(download_link)
#     print("Extracting", download_link)
#     with zipfile.ZipFile(data_dir, "r") as z:
#       z.extractall("data")
#     # remove the temp file
#     os.remove(data_dir)

# # comment the below line if you already downloaded the dataset
# download_and_extract_dataset()

In [None]:
from datasets import load_dataset

# load the custom dataset
ds = load_dataset("imagefolder", data_dir="/content/hateful_ViT1")

Resolving data files:   0%|          | 0/26500 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/2000 [00:00<?, ?it/s]

# 100% Missing Modality Dataset

In [None]:
from datasets import load_dataset

ds = load_dataset("imagefolder", data_dir="/content/hateful_ViT1")

In [None]:
def filename_ends_with_3(example):
    return example["file_name"].endswith("_3.png")

In [None]:
ds = ds.filter(filename_ends_with_3)
print(ds["train"][0:5])

# Exploring the Data

In [None]:
ds

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 26500
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 2000
    })
})

In [None]:
labels = ds["train"].features["label"]
labels

ClassLabel(names=['0', '1'], id=None)

In [None]:
labels.int2str(ds["train"][532]["label"])

'0'

In [None]:
import random
import matplotlib.pyplot as plt

def show_image_grid(dataset, split, grid_size=(4,4)):
    # Select random images from the given split
    indices = random.sample(range(len(dataset[split])), grid_size[0]*grid_size[1])
    images = [dataset[split][i]["image"] for i in indices]
    labels = [dataset[split][i]["label"] for i in indices]

    # Display the images in a grid
    fig, axes = plt.subplots(nrows=grid_size[0], ncols=grid_size[1], figsize=(8,8))
    for i, ax in enumerate(axes.flat):
        ax.imshow(images[i])
        ax.axis('off')
        ax.set_title(ds["train"].features["label"].int2str(labels[i]))

    plt.show()

In [None]:
# show_image_grid(ds, "train")

# Preprocessing the Data

In [None]:
def transform(examples):
  # convert all images to RGB format, then preprocessing it
  # using our image processor
  inputs = image_processor([img.convert("RGB") for img in examples["image"]], return_tensors="pt")
  # we also shouldn't forget about the labels
  inputs["labels"] = examples["label"]
  return inputs

In [None]:
# use the with_transform() method to apply the transform to the dataset on the fly during training
dataset = ds.with_transform(transform)

In [None]:
for item in dataset["train"]:
  print(item["pixel_values"].shape)
  print(item["labels"])
  break

torch.Size([3, 224, 224])
0


In [None]:
# extract the labels for our dataset
labels = ds["train"].features["label"].names
labels

['0', '1']

In [None]:
import torch

def collate_fn(batch):
  return {
      "pixel_values": torch.stack([x["pixel_values"] for x in batch]),
      "labels": torch.tensor([x["labels"] for x in batch]),
  }

# Defining the Metrics

In [None]:
from evaluate import load
import numpy as np
from sklearn.metrics import roc_auc_score

# load the accuracy and f1 metrics from the evaluate module
accuracy = load("accuracy")
f1 = load("f1")

def compute_metrics(eval_pred):
  # compute the accuracy and f1 scores & return them
  accuracy_score = accuracy.compute(predictions=np.argmax(eval_pred.predictions, axis=1), references=eval_pred.label_ids)
  f1_score = f1.compute(predictions=np.argmax(eval_pred.predictions, axis=1), references=eval_pred.label_ids, average="macro")

  auroc_score = roc_auc_score(eval_pred.label_ids, np.argmax(eval_pred.predictions, axis=1))
  print(f"AUROC Score: {auroc_score:.4f}")

  return {**accuracy_score, **f1_score}

# Training the Model

In [None]:
# load the ViT model
model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True,
)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--google--vit-base-patch16-224/snapshots/3f49326eb077187dfe1c2a2bb15fbd74e6ab91e3/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "0",
    "1": "1"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "0": "0",
    "1": "1"
  },
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
  "num_hidden_layers": 12,
  "patch_size": 16,
  "qkv_bias": true,
  "transformers_version": "4.33.1"
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--google--vit-base-patch16-224/snapshots/3f49326eb077187dfe1c2a2bb15fbd74e6ab9

In [None]:
# !pip install accelerate -U

In [None]:
# !pip install transformers[torch]

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="/content/output", # output directory
  per_device_train_batch_size=32, # batch size per device during training
  evaluation_strategy="steps",    # evaluation strategy to adopt during training
  num_train_epochs=10,             # total number of training epochs
  # fp16=True,                    # use mixed precision
  save_steps=1000,                # number of update steps before saving checkpoint
  eval_steps=1000,                # number of update steps before evaluating
  logging_steps=1000,             # number of update steps before logging
  # save_steps=50,
  # eval_steps=50,
  # logging_steps=50,
  save_total_limit=2,             # limit the total amount of checkpoints on disk
  remove_unused_columns=False,    # remove unused columns from the dataset
  push_to_hub=False,              # do not push the model to the hub
  report_to='tensorboard',        # report metrics to tensorboard
  load_best_model_at_end=True,    # load the best model at the end of training
)


Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
PyTorch: setting up devices


In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 26500
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 2000
    })
})

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,                        # the instantiated 🤗 Transformers model to be trained
    args=training_args,                 # training arguments, defined above
    data_collator=collate_fn,           # the data collator that will be used for batching
    compute_metrics=compute_metrics,    # the metrics function that will be used for evaluation
    train_dataset=dataset["train"],     # training dataset
    eval_dataset=dataset["test"],       # evaluation dataset
    tokenizer=image_processor,          # the processor that will be used for preprocessing the images
)

In [None]:
# start training
trainer.train()

***** Running training *****
  Num examples = 26,500
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 8,290
  Number of trainable parameters = 85,800,194


Step,Training Loss,Validation Loss,Accuracy,F1
1000,0.5355,0.730685,0.669,0.564416
2000,0.2847,0.775324,0.708,0.665622
3000,0.1241,0.949491,0.725,0.702146
4000,0.0484,1.274296,0.723,0.706515
5000,0.0178,1.512437,0.7535,0.732612
6000,0.0046,1.561816,0.7695,0.743659
7000,0.0005,1.709508,0.754,0.734656
8000,0.0002,1.690851,0.754,0.735117


***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.5829


Saving model checkpoint to /content/output/checkpoint-1000
Configuration saved in /content/output/checkpoint-1000/config.json
Model weights saved in /content/output/checkpoint-1000/pytorch_model.bin
Image processor saved in /content/output/checkpoint-1000/preprocessor_config.json
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.6603


Saving model checkpoint to /content/output/checkpoint-2000
Configuration saved in /content/output/checkpoint-2000/config.json
Model weights saved in /content/output/checkpoint-2000/pytorch_model.bin
Image processor saved in /content/output/checkpoint-2000/preprocessor_config.json
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.6995


Saving model checkpoint to /content/output/checkpoint-3000
Configuration saved in /content/output/checkpoint-3000/config.json
Model weights saved in /content/output/checkpoint-3000/pytorch_model.bin
Image processor saved in /content/output/checkpoint-3000/preprocessor_config.json
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7080


Saving model checkpoint to /content/output/checkpoint-4000
Configuration saved in /content/output/checkpoint-4000/config.json
Model weights saved in /content/output/checkpoint-4000/pytorch_model.bin
Image processor saved in /content/output/checkpoint-4000/preprocessor_config.json
Deleting older checkpoint [/content/output/checkpoint-2000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7292


Saving model checkpoint to /content/output/checkpoint-5000
Configuration saved in /content/output/checkpoint-5000/config.json
Model weights saved in /content/output/checkpoint-5000/pytorch_model.bin
Image processor saved in /content/output/checkpoint-5000/preprocessor_config.json
Deleting older checkpoint [/content/output/checkpoint-3000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7361


Saving model checkpoint to /content/output/checkpoint-6000
Configuration saved in /content/output/checkpoint-6000/config.json
Model weights saved in /content/output/checkpoint-6000/pytorch_model.bin
Image processor saved in /content/output/checkpoint-6000/preprocessor_config.json
Deleting older checkpoint [/content/output/checkpoint-4000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7323


Saving model checkpoint to /content/output/checkpoint-7000
Configuration saved in /content/output/checkpoint-7000/config.json
Model weights saved in /content/output/checkpoint-7000/pytorch_model.bin
Image processor saved in /content/output/checkpoint-7000/preprocessor_config.json
Deleting older checkpoint [/content/output/checkpoint-5000] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 2000
  Batch size = 8


AUROC Score: 0.7331


Saving model checkpoint to /content/output/checkpoint-8000
Configuration saved in /content/output/checkpoint-8000/config.json
Model weights saved in /content/output/checkpoint-8000/pytorch_model.bin
Image processor saved in /content/output/checkpoint-8000/preprocessor_config.json
Deleting older checkpoint [/content/output/checkpoint-6000] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/output/checkpoint-1000 (score: 0.7306849360466003).


TrainOutput(global_step=8290, training_loss=0.12253846677449343, metrics={'train_runtime': 11481.0891, 'train_samples_per_second': 23.081, 'train_steps_per_second': 0.722, 'total_flos': 2.053537724786688e+19, 'train_loss': 0.12253846677449343, 'epoch': 10.0})

In [None]:
# trainer.evaluate(dataset["test"])
trainer.evaluate()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import shutil

# Define the source folder path (in Colab)
source_folder_path = '/content/output/checkpoint-7000'

# Define the destination folder path (in Google Drive)
destination_folder_path = "/content/drive/MyDrive/Colab Notebooks/Hateful-Memes/Vit/checkpoint-7000"

# Remove the existing destination folder (if it exists)
if os.path.exists(destination_folder_path):
    shutil.rmtree(destination_folder_path)

# Copy the folder
shutil.copytree(source_folder_path, destination_folder_path)

'/content/drive/MyDrive/Colab Notebooks/Hateful-Memes/Vit/checkpoint-7000'

In [None]:
# start tensorboard
# %load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir ./vit-base-food/runs

## Alternatively: Training using PyTorch Loop
Run the two below cells to fine-tune using a regular PyTorch loop if you want.

In [None]:
# Training loop
from torch.utils.tensorboard import SummaryWriter
from torch.optim import AdamW
from torch.utils.data import DataLoader

batch_size = 32

train_dataset_loader = DataLoader(dataset["train"], collate_fn=collate_fn, batch_size=batch_size, shuffle=True)
valid_dataset_loader = DataLoader(dataset["validation"], collate_fn=collate_fn, batch_size=batch_size, shuffle=True)

# define the optimizer
optimizer = AdamW(model.parameters(), lr=1e-5)

log_dir = "./image-classification/tensorboard"
summary_writer = SummaryWriter(log_dir=log_dir)

num_epochs = 3
model = model.to(device)
# print some statistics before training
# number of training steps
n_train_steps = num_epochs * len(train_dataset_loader)
# number of validation steps
n_valid_steps = len(valid_dataset_loader)
# current training step
current_step = 0
# logging, eval & save steps
save_steps = 1000

def compute_metrics(eval_pred):
  accuracy_score = accuracy.compute(predictions=eval_pred.predictions, references=eval_pred.label_ids)
  f1_score = f1.compute(predictions=eval_pred.predictions, references=eval_pred.label_ids, average="macro")
  return {**accuracy_score, **f1_score}

In [None]:
for epoch in range(num_epochs):
    # set the model to training mode
    model.train()
    # initialize the training loss
    train_loss = 0
    # initialize the progress bar
    progress_bar = tqdm(range(current_step, n_train_steps), "Training", dynamic_ncols=True, ncols=80)
    for batch in train_dataset_loader:
      if (current_step+1) % save_steps == 0:
        ### evaluation code ###
        # evaluate on the validation set
        # if the current step is a multiple of the save steps
        print()
        print(f"Validation at step {current_step}...")
        print()
        # set the model to evaluation mode
        model.eval()
        # initialize our lists that store the predictions and the labels
        predictions, labels = [], []
        # initialize the validation loss
        valid_loss = 0
        for batch in valid_dataset_loader:
            # get the batch
            pixel_values = batch["pixel_values"].to(device)
            label_ids = batch["labels"].to(device)
            # forward pass
            outputs = model(pixel_values=pixel_values, labels=label_ids)
            # get the loss
            loss = outputs.loss
            valid_loss += loss.item()
            # free the GPU memory
            logits = outputs.logits.detach().cpu()
            # add the predictions to the list
            predictions.extend(logits.argmax(dim=-1).tolist())
            # add the labels to the list
            labels.extend(label_ids.tolist())
        # make the EvalPrediction object that the compute_metrics function expects
        eval_prediction = EvalPrediction(predictions=predictions, label_ids=labels)
        # compute the metrics
        metrics = compute_metrics(eval_prediction)
        # print the stats
        print()
        print(f"Epoch: {epoch}, Step: {current_step}, Train Loss: {train_loss / save_steps:.4f}, " +
              f"Valid Loss: {valid_loss / n_valid_steps:.4f}, Accuracy: {metrics['accuracy']}, " +
              f"F1 Score: {metrics['f1']}")
        print()
        # log the metrics
        summary_writer.add_scalar("valid_loss", valid_loss / n_valid_steps, global_step=current_step)
        summary_writer.add_scalar("accuracy", metrics["accuracy"], global_step=current_step)
        summary_writer.add_scalar("f1", metrics["f1"], global_step=current_step)
        # save the model
        model.save_pretrained(f"./vit-base-food/checkpoint-{current_step}")
        image_processor.save_pretrained(f"./vit-base-food/checkpoint-{current_step}")
        # get the model back to train mode
        model.train()
        # reset the train and valid loss
        train_loss, valid_loss = 0, 0
      ### training code below ###
      # get the batch & convert to tensor
      pixel_values = batch["pixel_values"].to(device)
      labels = batch["labels"].to(device)
      # forward pass
      outputs = model(pixel_values=pixel_values, labels=labels)
      # get the loss
      loss = outputs.loss
      # backward pass
      loss.backward()
      # update the weights
      optimizer.step()
      # zero the gradients
      optimizer.zero_grad()
      # log the loss
      loss_v = loss.item()
      train_loss += loss_v
      # increment the step
      current_step += 1
      progress_bar.update(1)
      # log the training loss
      summary_writer.add_scalar("train_loss", loss_v, global_step=current_step)


# Performing Inference

In [None]:
# load the best model, change the checkpoint number to the best checkpoint
# if the last checkpoint is the best, then ignore this cell
best_checkpoint = 7000
# best_checkpoint = 150
model = ViTForImageClassification.from_pretrained(f"./vit-base-food/checkpoint-{best_checkpoint}").to(device)
# model = ViTForImageClassification.from_pretrained(f"./vit-base-skin-cancer/checkpoint-{best_checkpoint}").to(device)

In [None]:
get_prediction(model, "https://images.pexels.com/photos/858496/pexels-photo-858496.jpeg?auto=compress&cs=tinysrgb&w=600&lazy=load")

'sushi'

In [None]:
def get_prediction_probs(model, url_or_path, num_classes=3):
    # load the image
    img = load_image(url_or_path)
    # preprocessing the image
    pixel_values = image_processor(img, return_tensors="pt")["pixel_values"].to(device)
    # perform inference
    output = model(pixel_values)
    # get the top k classes and probabilities
    probs, indices = torch.topk(output.logits.softmax(dim=1), k=num_classes)
    # get the class labels
    id2label = model.config.id2label
    classes = [id2label[idx.item()] for idx in indices[0]]
    # convert the probabilities to a list
    probs = probs.squeeze().tolist()
    # create a dictionary with the class names and probabilities
    results = dict(zip(classes, probs))
    return results

In [None]:
# example 1
get_prediction_probs(model, "https://images.pexels.com/photos/406152/pexels-photo-406152.jpeg?auto=compress&cs=tinysrgb&w=600")

In [None]:
# example 2
get_prediction_probs(model, "https://images.pexels.com/photos/920220/pexels-photo-920220.jpeg?auto=compress&cs=tinysrgb&w=600")

In [None]:
# example 3
get_prediction_probs(model, "https://images.pexels.com/photos/3338681/pexels-photo-3338681.jpeg?auto=compress&cs=tinysrgb&w=600")

In [None]:
# example 4
get_prediction_probs(model, "https://images.pexels.com/photos/806457/pexels-photo-806457.jpeg?auto=compress&cs=tinysrgb&w=600", num_classes=10)

In [None]:
get_prediction_probs(model, "https://images.pexels.com/photos/1624487/pexels-photo-1624487.jpeg?auto=compress&cs=tinysrgb&w=600")