<a href="https://colab.research.google.com/github/MuhammadIrzam447/NewEncodings/blob/main/Valid_11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load Model


In [1]:
# !pip install transformers evaluate datasets
import requests
import torch
from PIL import Image
from transformers import ViTForImageClassification, ViTImageProcessor
from tqdm import tqdm

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [3]:
model = ViTForImageClassification.from_pretrained(f"/content/MMLearning/data/Models/Model-02/checkpoint-54000").to(device)
image_processor = ViTImageProcessor.from_pretrained(f"/content/MMLearning/data/Models/Model-02/checkpoint-54000")

In [4]:
# image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")

In [5]:
model.config

ViTConfig {
  "_name_or_path": "/content/MMLearning/data/Models/Model-02/checkpoint-54000",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "apple_pie",
    "1": "baby_back_ribs",
    "2": "baklava",
    "3": "beef_carpaccio",
    "4": "beef_tartare",
    "5": "beet_salad",
    "6": "beignets",
    "7": "bibimbap",
    "8": "bread_pudding",
    "9": "breakfast_burrito",
    "10": "bruschetta",
    "11": "caesar_salad",
    "12": "cannoli",
    "13": "caprese_salad",
    "14": "carrot_cake",
    "15": "ceviche",
    "16": "cheese_plate",
    "17": "cheesecake",
    "18": "chicken_curry",
    "19": "chicken_quesadilla",
    "20": "chicken_wings",
    "21": "chocolate_cake",
    "22": "chocolate_mousse",
    "23": "churros",
    "24": "clam_chowder",
    "25": "club_sandwich",
    "26": "crab_cakes",
    "27": "creme_

# Standard Experiment

In [None]:
from datasets import load_dataset

ds = load_dataset("imagefolder", data_dir="/content/Dataset(s)/joint-food-101/test", split="train")

In [None]:
ds

# 100% Missing Modality Experiment

In [None]:
from datasets import load_dataset

ds = load_dataset("imagefolder", data_dir="/content/MMLearning/data/food-101/flip/multimodal_img_et_flip/test", split="train")

In [None]:
ds

In [None]:
# Use _3.png for Filtering out Encoded Images
# Use _4.png for Filtering out Just Images

import os
def filter_funtion(example):
    img = example["image"]
    filename = os.path.basename(img.filename)

    return filename.endswith("_4.jpg")

In [None]:
ds = ds.filter(filter_funtion)

In [None]:
ds

In [None]:
for i in range(0,10):
    sample = ds[i]
    img = sample["image"]
    filename = os.path.basename(img.filename)
    print(f"File Name: {filename}")

# Partial Missing Modality Experiment

In [None]:
from datasets import load_dataset
from datasets import concatenate_datasets

In [None]:
# Load the Fused Dataset
ds_f = load_dataset("imagefolder", data_dir="/content/MMLearning/data/food-101/multimodal/test", split="train")

In [None]:
ds_f

In [None]:
# Desired Percentage = Total - Missing
import math
desired_percentage = 0.1

In [None]:
from collections import Counter

class_counts = Counter(ds_f['label'])
print(class_counts)

In [None]:
selected_indices = {label: [] for label in class_counts.keys()}
print(selected_indices)

In [None]:
# Iterate through the dataset to select indices for each class
for i, label in enumerate(ds_f["label"]):
    if len(selected_indices[label]) < math.ceil(class_counts[label] * desired_percentage):
        selected_indices[label].append(i)

In [None]:
# selected_indices

In [None]:
# Flatten the selected indices list
selected_indices = [idx for indices in selected_indices.values() for idx in indices]

In [None]:
# selected_indices

In [None]:
# Filter the dataset to select the desired samples
fused_ds = ds_f.select(selected_indices)

In [None]:
fused_ds

In [None]:
# Load the Joint Dataset and Filter out Just the Images
ds_joint = load_dataset("imagefolder", data_dir="/content/MMLearning/data/food-101/flip/multimodal_img_et_flip/test", split="train")

In [None]:
# Use _4.png for Filtering out Just Images

import os
def filter_funtion(example):
    img = example["image"]
    filename = os.path.basename(img.filename)

    return filename.endswith("_3.jpg")

In [None]:
ds_imgs = ds_joint.filter(filter_funtion)

In [None]:
ds_imgs

In [None]:
for i in range(0,5):
    sample = ds_imgs[i]
    img = sample["image"]
    filename = os.path.basename(img.filename)
    print(f"File Name: {filename}")

In [None]:
all_indices = range(len(ds_imgs))
print(all_indices)

In [None]:
remaining_indices = [idx for idx in all_indices if idx not in selected_indices]

In [None]:
# remaining_indices

In [None]:
missing_ds = ds_imgs.select(remaining_indices)

In [None]:
missing_ds

In [None]:
# Combine Both Datasets

In [None]:
ds = concatenate_datasets([fused_ds, missing_ds])

In [None]:
ds

# Pre-Processing and Final Validation Loop

In [None]:
labels = ds.features["label"]
labels

In [None]:
labels.int2str(ds[532]["label"])

In [None]:
def transform(examples):
  inputs = image_processor([img.convert("RGB") for img in examples["image"]], return_tensors="pt")
  inputs["labels"] = examples["label"]

  return inputs

In [None]:
dataset = ds.with_transform(transform)

In [None]:
dataset

In [None]:
for item in dataset:
  print(item["pixel_values"].shape)
  print(item["labels"])
  break

In [None]:
labels = ds.features["label"].names
labels

In [None]:
import torch

def collate_fn(batch):
  return {
      "pixel_values": torch.stack([x["pixel_values"] for x in batch]),
      "labels": torch.tensor([x["labels"] for x in batch]),
  }

In [None]:
dataset

In [None]:
from torch.utils.tensorboard import SummaryWriter
from torch.optim import AdamW
from torch.utils.data import DataLoader

In [None]:
batch_size = 32

In [None]:
valid_dataset_loader = DataLoader(dataset, collate_fn=collate_fn, batch_size=batch_size, shuffle=False)

In [None]:
model = model.to(device)
model.eval()
predictions, labels = [], []
# valid_loss = 0
for batch in valid_dataset_loader:
    pixel_values = batch["pixel_values"].to(device)
    label_ids = batch["labels"].to(device)

    outputs = model(pixel_values=pixel_values, labels=label_ids)

    # loss = outputs.loss
    # valid_loss += loss.item()

    logits = outputs.logits.detach().cpu()

    predictions.extend(logits.argmax(dim=-1).tolist())
    labels.extend(label_ids.tolist())

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report

accuracy = accuracy_score(labels, predictions)
precision = precision_score(labels, predictions, average='weighted')
recall = recall_score(labels, predictions, average='weighted')
f1 = f1_score(labels, predictions, average='weighted')
# AUROC_score = roc_auc_score(labels, predictions)

In [None]:
print("Accuracy: ", accuracy)
print("precision: ", precision)
print("f1_score: ", f1)
print("recall", recall)
# print("AUROC_score: ", AUROC_score)

In [None]:
print(classification_report(labels, predictions))

In [None]:
cm = confusion_matrix(labels, predictions)
print(cm)

# Joint Representation Experiment


In [6]:
from datasets import load_dataset

ds = load_dataset("imagefolder", data_dir="/content/MMLearning/data/food-101/flip/multimodal_img_et_flip/test", split="train")

Resolving data files:   0%|          | 0/45432 [00:00<?, ?it/s]

In [7]:
ds

Dataset({
    features: ['image', 'label'],
    num_rows: 45432
})

In [8]:
# Use _3.png for Filtering out Encoded Images
# Use _4.png for Filtering out Just Images

import os
def filter_funtion(example):
    img = example["image"]
    filename = os.path.basename(img.filename)

    return filename.endswith("_4.jpg")

In [9]:
ds_3 = ds.filter(filter_funtion)

In [10]:
ds_3

Dataset({
    features: ['image', 'label'],
    num_rows: 22716
})

In [11]:
for i in range(0,5):
    sample = ds_3[i]
    img = sample["image"]
    filename = os.path.basename(img.filename)
    print(f"File Name: {filename}")

File Name: apple_pie_103_4.jpg
File Name: apple_pie_105_4.jpg
File Name: apple_pie_107_4.jpg
File Name: apple_pie_10_4.jpg
File Name: apple_pie_119_4.jpg


Image Dataset

In [12]:
# Use _3.png for Filtering out Encoded Images
# Use _4.png for Filtering out Just Images

import os
def filter_funtion(example):
    img = example["image"]
    filename = os.path.basename(img.filename)

    return  filename.endswith("_3.jpg")

In [13]:
ds_4 = ds.filter(filter_funtion)

In [14]:
ds_4

Dataset({
    features: ['image', 'label'],
    num_rows: 22716
})

In [15]:
for i in range(0,5):
    sample = ds_4[i]
    img = sample["image"]
    filename = os.path.basename(img.filename)
    print(f"File Name: {filename}")

File Name: apple_pie_103_3.jpg
File Name: apple_pie_105_3.jpg
File Name: apple_pie_107_3.jpg
File Name: apple_pie_10_3.jpg
File Name: apple_pie_119_3.jpg


Pre-processing

In [16]:
labels = ds.features["label"]
labels

ClassLabel(names=['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare', 'beet_salad', 'beignets', 'bibimbap', 'bread_pudding', 'breakfast_burrito', 'bruschetta', 'caesar_salad', 'cannoli', 'caprese_salad', 'carrot_cake', 'ceviche', 'cheese_plate', 'cheesecake', 'chicken_curry', 'chicken_quesadilla', 'chicken_wings', 'chocolate_cake', 'chocolate_mousse', 'churros', 'clam_chowder', 'club_sandwich', 'crab_cakes', 'creme_brulee', 'croque_madame', 'cup_cakes', 'deviled_eggs', 'donuts', 'dumplings', 'edamame', 'eggs_benedict', 'escargots', 'falafel', 'filet_mignon', 'fish_and_chips', 'foie_gras', 'french_fries', 'french_onion_soup', 'french_toast', 'fried_calamari', 'fried_rice', 'frozen_yogurt', 'garlic_bread', 'gnocchi', 'greek_salad', 'grilled_cheese_sandwich', 'grilled_salmon', 'guacamole', 'gyoza', 'hamburger', 'hot_and_sour_soup', 'hot_dog', 'huevos_rancheros', 'hummus', 'ice_cream', 'lasagna', 'lobster_bisque', 'lobster_roll_sandwich', 'macaroni_and_cheese', 'mac

In [17]:
labels.int2str(ds[532]["label"])

'baby_back_ribs'

In [18]:
def transform(examples):
  inputs = image_processor([img.convert("RGB") for img in examples["image"]], return_tensors="pt")
  inputs["labels"] = examples["label"]

  return inputs

In [19]:
dataset_3 = ds_3.with_transform(transform)
dataset_4 = ds_4.with_transform(transform)

In [20]:
dataset_3

Dataset({
    features: ['image', 'label'],
    num_rows: 22716
})

In [21]:
dataset_4

Dataset({
    features: ['image', 'label'],
    num_rows: 22716
})

In [22]:
for item in dataset_4:
  print(item["pixel_values"].shape)
  print(item["labels"])
  break

torch.Size([3, 224, 224])
0


In [23]:
labels = ds.features["label"].names
labels

['apple_pie',
 'baby_back_ribs',
 'baklava',
 'beef_carpaccio',
 'beef_tartare',
 'beet_salad',
 'beignets',
 'bibimbap',
 'bread_pudding',
 'breakfast_burrito',
 'bruschetta',
 'caesar_salad',
 'cannoli',
 'caprese_salad',
 'carrot_cake',
 'ceviche',
 'cheese_plate',
 'cheesecake',
 'chicken_curry',
 'chicken_quesadilla',
 'chicken_wings',
 'chocolate_cake',
 'chocolate_mousse',
 'churros',
 'clam_chowder',
 'club_sandwich',
 'crab_cakes',
 'creme_brulee',
 'croque_madame',
 'cup_cakes',
 'deviled_eggs',
 'donuts',
 'dumplings',
 'edamame',
 'eggs_benedict',
 'escargots',
 'falafel',
 'filet_mignon',
 'fish_and_chips',
 'foie_gras',
 'french_fries',
 'french_onion_soup',
 'french_toast',
 'fried_calamari',
 'fried_rice',
 'frozen_yogurt',
 'garlic_bread',
 'gnocchi',
 'greek_salad',
 'grilled_cheese_sandwich',
 'grilled_salmon',
 'guacamole',
 'gyoza',
 'hamburger',
 'hot_and_sour_soup',
 'hot_dog',
 'huevos_rancheros',
 'hummus',
 'ice_cream',
 'lasagna',
 'lobster_bisque',
 'lobster

In [24]:
num_of_classes = len(labels)
print(num_of_classes)

101


In [25]:
import torch

def collate_fn(batch):
  return {
      "pixel_values": torch.stack([x["pixel_values"] for x in batch]),
      "labels": torch.tensor([x["labels"] for x in batch]),
  }

In [26]:
from torch.utils.tensorboard import SummaryWriter
from torch.optim import AdamW
from torch.utils.data import DataLoader

In [27]:
batch_size = 32

In [28]:
valid_dataset_loader_4 = DataLoader(dataset_4, collate_fn=collate_fn, batch_size=batch_size, shuffle=False)

In [29]:
print(len(valid_dataset_loader_4))

710


In [30]:
valid_dataset_loader_3 = DataLoader(dataset_3, collate_fn=collate_fn, batch_size=batch_size, shuffle=False)

In [31]:
print(len(valid_dataset_loader_3))

710


In [32]:
num_batches = len(valid_dataset_loader_3)
num_batches_with_logits_3 = int(num_batches * 0.1)  # ___% of batches that you want to include (30 for now)
print("Total Batches: ", num_batches)
print("Missing Modaility Batches: ", num_batches_with_logits_3)

Total Batches:  710
Missing Modaility Batches:  71


In [33]:
import random

controller = random.sample(range(0, num_batches), num_batches_with_logits_3)
controller = sorted(controller)
print(len(controller))

71


Validation Loops

In [34]:
model = model.to(device)
model.eval()
predictions_4, labels_4 = [], []
for batch in valid_dataset_loader_4:
    pixel_values = batch["pixel_values"].to(device)
    label_ids = batch["labels"].to(device)

    outputs = model(pixel_values=pixel_values, labels=label_ids)

    logits = outputs.logits.detach().cpu()
    probabilities = torch.softmax(logits, dim=1)

    predictions_4.extend(probabilities.cpu().numpy())
    labels_4.extend(label_ids.cpu().numpy())

In [35]:
predictions_3 = []
batch_idx = 0
for batch in valid_dataset_loader_3:
    pixel_values = batch["pixel_values"].to(device)
    label_ids = batch["labels"].to(device)

    # if batch_idx < num_batches_with_logits_3:
    if batch_idx in controller:
        outputs = model(pixel_values=pixel_values, labels=label_ids)
        logits = outputs.logits.detach().cpu()
    else:
        length = len(label_ids)
        logits = torch.zeros((length, num_of_classes), device="cpu")

    probabilities = torch.softmax(logits, dim=1)
    predictions_3.extend(probabilities.cpu().numpy())
    batch_idx = batch_idx + 1

In [36]:
import numpy as np
predictions_3 = np.array(predictions_3)
predictions_4 = np.array(predictions_4)

In [37]:
predictions_avg = (predictions_3 + predictions_4) / 2
predictions = np.argmax(predictions_avg, axis=1)

In [38]:
predictions = np.array(predictions)
labels = np.array(labels_4)

In [39]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report

accuracy = accuracy_score(labels, predictions)
precision = precision_score(labels, predictions, average='weighted')
recall = recall_score(labels, predictions, average='weighted')
f1 = f1_score(labels, predictions, average='weighted')
# AUROC_score = roc_auc_score(labels, predictions)

In [40]:
print("Accuracy: ", accuracy)
print("precision: ", precision)
print("f1_score: ", f1)
print("recall", recall)
# print("AUROC_score: ", AUROC_score)

Accuracy:  0.7542701179785174
precision:  0.7583643673109025
f1_score:  0.7540841274544681
recall 0.7542701179785174


In [41]:
print(classification_report(labels, predictions))

              precision    recall  f1-score   support

           0       0.62      0.82      0.70       234
           1       0.85      0.77      0.81       221
           2       0.91      0.87      0.89       226
           3       0.79      0.74      0.77       222
           4       0.51      0.57      0.54       225
           5       0.86      0.82      0.84       224
           6       0.64      0.73      0.68       224
           7       0.74      0.66      0.70       225
           8       0.66      0.74      0.70       226
           9       0.72      0.70      0.71       214
          10       0.72      0.79      0.76       231
          11       0.82      0.89      0.85       227
          12       0.72      0.81      0.76       230
          13       0.85      0.85      0.85       220
          14       0.68      0.65      0.67       231
          15       0.69      0.77      0.73       227
          16       0.64      0.69      0.66       224
          17       0.72    

In [42]:
cm = confusion_matrix(labels, predictions)
print(cm)

[[191   1   0 ...   1   0   2]
 [  0 170   0 ...   0   0   1]
 [  7   0 196 ...   0   0   1]
 ...
 [  2   0   1 ... 187   0   0]
 [  1   0   0 ...   0 142   0]
 [  5   0   0 ...   1   0 198]]
