<a href="https://colab.research.google.com/github/MuhammadIrzam447/MultiModel/blob/master/Train_26.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Created this notebook for the ViT fine tuning for multi-label classification

In [None]:
!gdown https://drive.google.com/uc?id=1Yva4FLcRiSbcf3SAENVQFPWkfNG-GUmt

In [None]:
!gdown https://drive.google.com/uc?id=1X4cmMYRjxXFomCJ1adMhPNMYtd4WeDHP

In [None]:
!gdown https://drive.google.com/uc?id=1dxd2pySfCIDJYG7qMtuJre8ph068xc1X

In [None]:
!gdown https://drive.google.com/uc?id=1sYR9EgHkM0oiGRQVlFQCyHO8kMRJ4ibQ

In [None]:
!unzip /content/fused_test.zip

In [None]:
!unzip /content/fused_train.zip

In [None]:
!pip install transformers evaluate datasets

In [None]:
import requests
import torch
from PIL import Image
from transformers import *
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
model_name = "google/vit-base-patch16-224"
image_processor = ViTImageProcessor.from_pretrained(model_name)
model = ViTForImageClassification.from_pretrained(model_name).to(device)

# Loading a Custom Dataset

Training Dataset

In [None]:
import os
image_file_paths = []
genre_labels = []

image_folder_add = "/content/Dataset(s)/mm-imdb/fused/train"
labels_file = "/content/Dataset(s)/mm-imdb/fused/train_label.txt"

with open(labels_file, 'r') as file:
    for line in file:
        parts = line.strip().split('|')
        filename = parts[0].strip()
        labels = parts[1].strip().split(', ')  # Split labels by comma and remove leading/trailing spaces
        image_path = os.path.join(image_folder_add, filename)
        image_file_paths.append(image_path)
        genre_labels.append(labels)

In [None]:
from collections import defaultdict

label_counts = defaultdict(int)

for labels in genre_labels:
    for label in labels:
        label_counts[label] += 1


label_count_list = [(label, count) for label, count in label_counts.items()]
sorted_label_count_list = sorted(label_count_list, key=lambda x: x[1], reverse=True)

for label, count in sorted_label_count_list:
    print(f"{label}: {count}")

print("Total Labels: ", len(label_count_list))

Drama: 25272
Comedy: 15324
Romance: 9678
Thriller: 9339
Crime: 6879
Action: 6465
Adventure: 4833
Horror: 4809
Documentary: 3702
Mystery: 3693
Sci-Fi: 3636
Fantasy: 3486
Family: 2934
War: 2418
Biography: 2364
History: 2040
Music: 1902
Animation: 1758
Musical: 1509
Western: 1269
Sport: 1137
Short: 843
Film-Noir: 606
News: 117
Talk-Show: 6
Reality-TV: 3
Total Labels:  26


In [None]:
min_label_count = 500
valid_labels = [label for label, count in label_counts.items() if count >= min_label_count]
valid_labels = sorted(list(valid_labels))

In [None]:
valid_labels, len(valid_labels)

(['Action',
  'Adventure',
  'Animation',
  'Biography',
  'Comedy',
  'Crime',
  'Documentary',
  'Drama',
  'Family',
  'Fantasy',
  'Film-Noir',
  'History',
  'Horror',
  'Music',
  'Musical',
  'Mystery',
  'Romance',
  'Sci-Fi',
  'Short',
  'Sport',
  'Thriller',
  'War',
  'Western'],
 23)

In [None]:
multi_hot_labels = []

for labels in genre_labels:
    multi_hot = [1. if label in labels else 0 for label in valid_labels]
    multi_hot_labels.append(multi_hot)

In [None]:
from datasets import Dataset

train_data = {'image': image_file_paths, 'label': multi_hot_labels}
ds_train = Dataset.from_dict(train_data)

Validation Dataset

In [None]:
import os

test_image_file_paths = []
test_genre_labels = []

image_folder_add = "/content/Dataset(s)/mm-imdb/fused/test"
labels_file = "/content/Dataset(s)/mm-imdb/fused/test_label.txt"

with open(labels_file, 'r') as file:
    for line in file:
        parts = line.strip().split('|')
        filename = parts[0].strip()
        labels = parts[1].strip().split(', ')  # Split labels by comma and remove leading/trailing spaces

        if not (filename.endswith("_1.png") or filename.endswith("_2.png")):
            image_path = os.path.join(image_folder_add, filename)
            test_image_file_paths.append(image_path)
            test_genre_labels.append(labels)

In [None]:
len(test_image_file_paths)

7799

In [None]:
test_image_file_paths[0:10]

['/content/Dataset(s)/mm-imdb/fused/test/0078718.png',
 '/content/Dataset(s)/mm-imdb/fused/test/0089003.png',
 '/content/Dataset(s)/mm-imdb/fused/test/0098136.png',
 '/content/Dataset(s)/mm-imdb/fused/test/0057693.png',
 '/content/Dataset(s)/mm-imdb/fused/test/0385330.png',
 '/content/Dataset(s)/mm-imdb/fused/test/0096487.png',
 '/content/Dataset(s)/mm-imdb/fused/test/1220553.png',
 '/content/Dataset(s)/mm-imdb/fused/test/1341764.png',
 '/content/Dataset(s)/mm-imdb/fused/test/0882969.png',
 '/content/Dataset(s)/mm-imdb/fused/test/0119918.png']

In [None]:
test_multi_hot_labels = []

for labels in test_genre_labels:
    multi_hot = [1. if label in labels else 0 for label in valid_labels]
    test_multi_hot_labels.append(multi_hot)

In [None]:
val_data = {'image': test_image_file_paths, 'label': test_multi_hot_labels}
ds_val = Dataset.from_dict(val_data)

In [None]:
ds_val

Dataset({
    features: ['image', 'label'],
    num_rows: 7799
})

# Just Image Training

In [None]:
# Use _3.png for Filtering out Encoded Images
# Use _4.png for Filtering out Just Images

import os
def filter_funtion(example):
    img = example["image"]
    filename = os.path.basename(img.filename)

    return filename.endswith("_4.png")

In [None]:
ds_train = ds_train.filter(filter_funtion)

In [None]:
ds_val = ds_val.filter(filter_funtion)

# Exploring the Data

In [None]:
# labels = ds_train.features["label"]
# labels
labels = valid_labels
labels

['Action',
 'Adventure',
 'Animation',
 'Biography',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'Film-Noir',
 'History',
 'Horror',
 'Music',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Short',
 'Sport',
 'Thriller',
 'War',
 'Western']

In [None]:
# labels.int2str(ds_train[532]["label"])

# Preprocessing the Data

In [None]:
import PIL.Image as pil

def transform(examples):
  # inputs = image_processor([img.convert("RGB") for img in examples["image"]], return_tensors="pt")
  inputs = image_processor([pil.open(img).convert("RGB") for img in examples["image"]], return_tensors="pt")
  inputs["labels"] = examples["label"]
  return inputs

In [None]:
# use the with_transform() method to apply the transform to the dataset on the fly during training
train_dataset = ds_train.with_transform(transform)
val_dataset = ds_val.with_transform(transform)

In [None]:
for item in train_dataset:
  print(item["pixel_values"].shape)
  print(item["labels"])
  break

torch.Size([3, 224, 224])
[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]


In [None]:
# # extract the labels for our dataset
# labels = ds_train.features["label"].names
# labels

In [None]:
import torch

def collate_fn(batch):
  return {
      "pixel_values": torch.stack([x["pixel_values"] for x in batch]),
      "labels": torch.tensor([x["labels"] for x in batch]),
  }

In [None]:
train_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 46656
})

In [None]:
val_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 7799
})

# Defining the Metrics

In [None]:
# from evaluate import load
# import numpy as np
# from sklearn.metrics import roc_auc_score

# # load the accuracy and f1 metrics from the evaluate module
# accuracy = load("accuracy")
# f1 = load("f1")

# def compute_metrics(eval_pred):
#   # compute the accuracy and f1 scores & return them
#   accuracy_score = accuracy.compute(predictions=np.argmax(eval_pred.predictions, axis=1), references=eval_pred.label_ids)
#   f1_score = f1.compute(predictions=np.argmax(eval_pred.predictions, axis=1), references=eval_pred.label_ids, average="macro")

#   # auroc_score = roc_auc_score(eval_pred.label_ids, np.argmax(eval_pred.predictions, axis=1))
#   # print(f"AUROC Score: {auroc_score:.4f}")

#   return {**accuracy_score, **f1_score}

In [None]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(eval_pred):
    logits = eval_pred.predictions
    label_ids = eval_pred.label_ids

    # sigmoid_predictions = torch.sigmoid(logits)
    logits_tensor = torch.tensor(logits)
    sigmoid_predictions = torch.sigmoid(logits_tensor)

    threshold = 0.5
    thresholded_predictions = (sigmoid_predictions > threshold).cpu().numpy().astype(int)


    accuracy = accuracy_score(label_ids, thresholded_predictions)
    f1 = f1_score(label_ids, thresholded_predictions, average="macro")

    return {"accuracy": accuracy, "f1": f1}


# Training the Model

In [None]:
# load the ViT model
model = ViTForImageClassification.from_pretrained(
    model_name,
    num_labels= len(valid_labels),
    label2id = {label: str(i) for i, label in enumerate(valid_labels)},
    id2label = {str(i): label for i, label in enumerate(valid_labels)},
    problem_type = "multi_label_classification",
    ignore_mismatched_sizes=True,
)

In [None]:
# !pip install accelerate -U

In [None]:
# !pip install transformers[torch]

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
  output_dir="/content/Model/Models-Train-26", # output directory
  per_device_train_batch_size=32, # batch size per device during training
  evaluation_strategy="steps",    # evaluation strategy to adopt during training
  num_train_epochs=25,             # total number of training epochs
  # fp16=True,                    # use mixed precision
  save_steps=4000,                # number of update steps before saving checkpoint
  eval_steps=4000,                # number of update steps before evaluating
  logging_steps=4000,             # number of update steps before logging
  # save_steps=50,
  # eval_steps=50,
  # logging_steps=50,
  save_total_limit=4,             # limit the total amount of checkpoints on disk
  remove_unused_columns=False,    # remove unused columns from the dataset
  push_to_hub=False,              # do not push the model to the hub
  report_to='tensorboard',        # report metrics to tensorboard
  load_best_model_at_end=True,    # load the best model at the end of training
)


Found safetensors installation, but --save_safetensors=False. Safetensors should be a preferred weights saving format due to security and performance reasons. If your model cannot be saved by safetensors please feel free to open an issue at https://github.com/huggingface/safetensors!
PyTorch: setting up devices


In [None]:
train_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 46656
})

In [None]:
val_dataset

Dataset({
    features: ['image', 'label'],
    num_rows: 7799
})

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,                        # the instantiated 🤗 Transformers model to be trained
    args=training_args,                 # training arguments, defined above
    data_collator=collate_fn,           # the data collator that will be used for batching
    compute_metrics=compute_metrics,    # the metrics function that will be used for evaluation
    train_dataset=train_dataset,        # training dataset
    eval_dataset=val_dataset,           # evaluation dataset
    tokenizer=image_processor,          # the processor that will be used for preprocessing the images
)

In [None]:
# start training
trainer.train()

***** Running training *****
  Num examples = 46,656
  Num Epochs = 25
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 36,450
  Number of trainable parameters = 85,816,343


Step,Training Loss,Validation Loss


In [None]:
# trainer.evaluate(dataset["test"])
trainer.evaluate()

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# import shutil, os

# # Define the source folder path (in Colab)
# source_folder_path = '/content/output/checkpoint-7000'

# # Define the destination folder path (in Google Drive)
# destination_folder_path = "/content/drive/MyDrive/Colab Notebooks/Hateful-Memes/Vit/checkpoint-7000"

# # Remove the existing destination folder (if it exists)
# if os.path.exists(destination_folder_path):
#     shutil.rmtree(destination_folder_path)

# # Copy the folder
# shutil.copytree(source_folder_path, destination_folder_path)

In [None]:
# # start tensorboard
# # %load_ext tensorboard
# %reload_ext tensorboard
# %tensorboard --logdir /content/Model/Models-Train-15/runs

## Alternatively: Training using PyTorch Loop
Run the two below cells to fine-tune using a regular PyTorch loop if you want.

In [None]:
# Training loop
from torch.utils.tensorboard import SummaryWriter
from torch.optim import AdamW
from torch.utils.data import DataLoader

batch_size = 32

train_dataset_loader = DataLoader(dataset["train"], collate_fn=collate_fn, batch_size=batch_size, shuffle=True)
valid_dataset_loader = DataLoader(dataset["validation"], collate_fn=collate_fn, batch_size=batch_size, shuffle=True)

# define the optimizer
optimizer = AdamW(model.parameters(), lr=1e-5)

log_dir = "./image-classification/tensorboard"
summary_writer = SummaryWriter(log_dir=log_dir)

num_epochs = 3
model = model.to(device)
# print some statistics before training
# number of training steps
n_train_steps = num_epochs * len(train_dataset_loader)
# number of validation steps
n_valid_steps = len(valid_dataset_loader)
# current training step
current_step = 0
# logging, eval & save steps
save_steps = 1000

def compute_metrics(eval_pred):
  accuracy_score = accuracy.compute(predictions=eval_pred.predictions, references=eval_pred.label_ids)
  f1_score = f1.compute(predictions=eval_pred.predictions, references=eval_pred.label_ids, average="macro")
  return {**accuracy_score, **f1_score}

In [None]:
for epoch in range(num_epochs):
    # set the model to training mode
    model.train()
    # initialize the training loss
    train_loss = 0
    # initialize the progress bar
    progress_bar = tqdm(range(current_step, n_train_steps), "Training", dynamic_ncols=True, ncols=80)
    for batch in train_dataset_loader:
      if (current_step+1) % save_steps == 0:
        ### evaluation code ###
        # evaluate on the validation set
        # if the current step is a multiple of the save steps
        print()
        print(f"Validation at step {current_step}...")
        print()
        # set the model to evaluation mode
        model.eval()
        # initialize our lists that store the predictions and the labels
        predictions, labels = [], []
        # initialize the validation loss
        valid_loss = 0
        for batch in valid_dataset_loader:
            # get the batch
            pixel_values = batch["pixel_values"].to(device)
            label_ids = batch["labels"].to(device)
            # forward pass
            outputs = model(pixel_values=pixel_values, labels=label_ids)
            # get the loss
            loss = outputs.loss
            valid_loss += loss.item()
            # free the GPU memory
            logits = outputs.logits.detach().cpu()
            # add the predictions to the list
            predictions.extend(logits.argmax(dim=-1).tolist())
            # add the labels to the list
            labels.extend(label_ids.tolist())
        # make the EvalPrediction object that the compute_metrics function expects
        eval_prediction = EvalPrediction(predictions=predictions, label_ids=labels)
        # compute the metrics
        metrics = compute_metrics(eval_prediction)
        # print the stats
        print()
        print(f"Epoch: {epoch}, Step: {current_step}, Train Loss: {train_loss / save_steps:.4f}, " +
              f"Valid Loss: {valid_loss / n_valid_steps:.4f}, Accuracy: {metrics['accuracy']}, " +
              f"F1 Score: {metrics['f1']}")
        print()
        # log the metrics
        summary_writer.add_scalar("valid_loss", valid_loss / n_valid_steps, global_step=current_step)
        summary_writer.add_scalar("accuracy", metrics["accuracy"], global_step=current_step)
        summary_writer.add_scalar("f1", metrics["f1"], global_step=current_step)
        # save the model
        model.save_pretrained(f"./vit-base-food/checkpoint-{current_step}")
        image_processor.save_pretrained(f"./vit-base-food/checkpoint-{current_step}")
        # get the model back to train mode
        model.train()
        # reset the train and valid loss
        train_loss, valid_loss = 0, 0
      ### training code below ###
      # get the batch & convert to tensor
      pixel_values = batch["pixel_values"].to(device)
      labels = batch["labels"].to(device)
      # forward pass
      outputs = model(pixel_values=pixel_values, labels=labels)
      # get the loss
      loss = outputs.loss
      # backward pass
      loss.backward()
      # update the weights
      optimizer.step()
      # zero the gradients
      optimizer.zero_grad()
      # log the loss
      loss_v = loss.item()
      train_loss += loss_v
      # increment the step
      current_step += 1
      progress_bar.update(1)
      # log the training loss
      summary_writer.add_scalar("train_loss", loss_v, global_step=current_step)
