<a href="https://colab.research.google.com/github/Kumar-Sanskar-2724/Vision-Transformer/blob/main/Vision_Transformer_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets,transforms
import timm
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from PIL import Image
# Get going_modular directory
try:
  from going_modular.going_modular import data_setup,engine
  from helper_functions import plot_loss_curves,set_seeds,download_data
except:
  print(f"[INFO] Couldn't find going_modular and helper_functions directory, downloading them from GitHub...")
  !git clone https://github.com/mrdbourke/pytorch-deep-learning
  !mv pytorch-deep-learning/going_modular .
  !mv pytorch-deep-learning/helper_functions.py .
  !rm -rf pytorch-deep-learning
  from going_modular.going_modular import data_setup,engine
  from helper_functions import download_data,set_seeds,plot_loss_curves

[INFO] Couldn't find going_modular and helper_functions directory, downloading them from GitHub...
Cloning into 'pytorch-deep-learning'...
remote: Enumerating objects: 4393, done.[K
remote: Counting objects: 100% (1534/1534), done.[K
remote: Compressing objects: 100% (135/135), done.[K
remote: Total 4393 (delta 1458), reused 1399 (delta 1399), pack-reused 2859 (from 2)[K
Receiving objects: 100% (4393/4393), 650.71 MiB | 32.28 MiB/s, done.
Resolving deltas: 100% (2660/2660), done.
Updating files: 100% (248/248), done.


## Setting Devic Agnostic Code

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

## Getting Data

In [None]:
train_dataset = datasets.CIFAR10(root='./data',train=True,transform=transform,download=True)
test_dataset = datasets.CIFAR10(root='./data',train=False,transform=transform,download=True)

100%|██████████| 170M/170M [00:05<00:00, 30.8MB/s]


In [None]:
class_names=train_dataset.classes
class_names
class_idx = train_dataset.class_to_idx
class_idx

{'airplane': 0,
 'automobile': 1,
 'bird': 2,
 'cat': 3,
 'deer': 4,
 'dog': 5,
 'frog': 6,
 'horse': 7,
 'ship': 8,
 'truck': 9}

In [None]:
example = download_data(source='https://github.com/Kumar-Sanskar-2724/Vision-Transformer/raw/refs/heads/main/examples/examples.zip',destination='examples')

[INFO] Did not find data/examples directory, creating one...
[INFO] Downloading examples.zip from https://github.com/Kumar-Sanskar-2724/Vision-Transformer/raw/refs/heads/main/examples/examples.zip...
[INFO] Unzipping examples.zip data...


In [None]:
label_map = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'
}

In [None]:
train_dataloader = DataLoader(train_dataset,batch_size=64,shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=64,shuffle=False)

## Setting up Model

In [None]:
model = timm.create_model('vit_tiny_patch16_224',pretrained=True)
model.head = nn.Linear(in_features=model.head.in_features,out_features=10)
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/22.9M [00:00<?, ?B/s]

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=192, out_features=576, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=192, out_features=192, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=192, out_features=768, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()


## Creating a function to time our experiments

In [None]:
from timeit import default_timer as timer
def print_train_time(start:float,
                     end:float,
                     device:torch.device=None):
  """ Prints difference between start time and end time"""
  train_time = end - start
  print(f"Total train time on:{device} {train_time:.3f} seconds")
  return train_time

## Training and Evaluation

In [None]:
loss_fn=nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(),lr=3e-5)

In [None]:
from going_modular.going_modular import engine
set_seeds()
model_results = engine.train(model=model,
                             train_dataloader=train_dataloader,
                             test_dataloader=test_dataloader,
                             optimizer=optimizer,
                             loss_fn=loss_fn,
                             epochs=3,
                             device=device)

  0%|          | 0/3 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.2322 | train_acc: 0.9234 | test_loss: 0.1244 | test_acc: 0.9577
Epoch: 2 | train_loss: 0.0592 | train_acc: 0.9802 | test_loss: 0.1314 | test_acc: 0.9592
Epoch: 3 | train_loss: 0.0225 | train_acc: 0.9927 | test_loss: 0.1358 | test_acc: 0.9617


## Saving our model

In [None]:
from going_modular.going_modular import utils
utils.save_model(model=model,
                 target_dir='models',
                 model_name='ViT_feature_extractor.pth')

[INFO] Saving model to: models/ViT_feature_extractor.pth


## Model Deplyoment

In [None]:
# Import/install gradio
try:
  import gradio as gr
except:
  !pip -q install gradio
  import gradio as gr

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m130.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# Auto-collect example images
import glob
example_list = [[path] for path in glob.glob("data/examples/*.jpg")]
example_list

[['data/examples/ship.jpg'],
 ['data/examples/dog.jpg'],
 ['data/examples/cat.jpg'],
 ['data/examples/Airplane.jpg'],
 ['data/examples/bird.jpg'],
 ['data/examples/frog.jpg'],
 ['data/examples/automobile.jpg'],
 ['data/examples/deer.jpg'],
 ['data/examples/truck.jpg'],
 ['data/examples/horse.jpg']]

In [None]:
def predict(image):
    image = image.convert("RGB")
    input_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        predicted_class = output.argmax(1).item()

    return f"Predicted Class: {label_map[predicted_class]}"

In [None]:
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs='text',
    examples= example_list,
    title="Vision Transformer CIFAR-10 Classifier",
    description="Upload a CIFAR-10 image, and the ViT Tiny model will predict the class."
)
demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://234b816628f1d679c6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Turning our CIFAR Model into a deployable app

### Creating a `demos` folder to store our CIFAR app files

In [None]:
import pathlib
from pathlib import Path
import shutil

# Create CIFAR with demo path
cifar = Path('demo/cifar')

# Remove files that might exist and create a new directory
if cifar.exists():
  shutil.rmtree(cifar)
  cifar.mkdir(parents=True,exist_ok=True)
else:
  cifar.mkdir(parents=True,exist_ok=True)

!ls demo/cifar

## Creating a folder of example images to use with our CIFAR demo

In [None]:
import shutil
from pathlib import Path

# Create an example directory
cifar_example_path = cifar/'examples'
cifar_example_path.mkdir(parents=True,exist_ok=True)

# Collect 3 random test dataset image path
cifar_examples = [Path('data/examples/Airplane.jpg'),
                            Path('data/examples/automobile.jpg'),
                            Path('data/examples/bird.jpg')]

# Copy the three images to the examples directory
for example in cifar_examples:
  destination = cifar_example_path/example.name
  print(f'[INFO] Copying {example} to {destination}')
  shutil.copy2(src=example,
              dst=destination)

[INFO] Copying data/examples/Airplane.jpg to demo/cifar/examples/Airplane.jpg
[INFO] Copying data/examples/automobile.jpg to demo/cifar/examples/automobile.jpg
[INFO] Copying data/examples/bird.jpg to demo/cifar/examples/bird.jpg


In [None]:
import os
example_list = [['examples/'+example] for example in os.listdir(cifar_example_path)]
example_list

[['examples/Airplane.jpg'], ['examples/bird.jpg'], ['examples/automobile.jpg']]

## Moving our trained Tiny ViT model to our CIFAR demo directory

In [None]:
import shutil

# Create a source path for our target model
tiny_vit_model_path = 'models/ViT_feature_extractor.pth'

# Create a destination path for our target model
tiny_vit_destination = cifar/tiny_vit_model_path.split('/')[1]

# Try to move the model file
try:
  print(f'[INFO] Attmepting to move :{tiny_vit_model_path} to {tiny_vit_destination}')

  # Move the model
  shutil.move(src=tiny_vit_model_path,
              dst=tiny_vit_destination)
  print(f'[INFO] Model move complete')

except:
  print(f"[INFO] No model found at {tiny_vit_model_path}, perhaps its already been moved?")
  print(f"[INFO] Model exists at {tiny_vit_destination}: {tiny_vit_destination.exists()}")

[INFO] Attmepting to move :models/ViT_feature_extractor.pth to demo/cifar/ViT_feature_extractor.pth
[INFO] Model move complete


## Turning off Tiny_ViT model into a Python script (model.py)

In [None]:
%%writefile demo/cifar/model.py
import torch
import torchvision
from torch import nn
import timm
from torchvision import transforms

def create_model(num_classes:int=10,
                 seeds:int=42):
  transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

  model = timm.create_model('vit_tiny_patch16_224',pretrained=True)
  model.head = nn.Linear(in_features=model.head.in_features,out_features=10)

  return model,transform

Writing demo/cifar/model.py


In [None]:
class_names

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

## Turning our Tiny_ViT Gradio app into a Python script(app.py)

In [None]:
%%writefile demo/cifar/app.py
### 1. Imports and class names setup ###
import gradio as gr
import os
import torch

from model import create_model
from timeit import default_timer as timer
from typing import Tuple,Dict

# Setup class names
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

### 2. Model and transforms perparation ###
tiny_vit,tiny_vit_transforms = create_model(num_classes=10)

# Load save weights
tiny_vit.load_state_dict(torch.load(f='ViT_feature_extractor.pth',
                                    map_location=torch.device('cpu')))

# Predict function
def predict(image):
    # Make sure the image is in RGB
    image = image.convert("RGB")

    # Apply the necessary transformation for your model
    input_tensor = transform(image).unsqueeze(0).to(device)

    # Set model to evaluation mode
    model.eval()
    with torch.no_grad():
        # Get the model's raw output (logits)
        output = model(input_tensor)

        # Apply softmax to convert logits to probabilities
        pred_probs = torch.softmax(output, dim=1)

    # Create a dictionary mapping class names to their probabilities
    pred_label_and_probs = {
        class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))
    }

    return pred_label_and_probs

# Gradio app
title="Vision Transformer CIFAR-10 Classifier",
description="Upload a CIFAR-10 image, and the ViT Tiny model will predict the class."

# Creating example list
example_list =[['examples/'+example] for example in os.listdir('examples')]

# Create the gradio demo
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs='text',
    examples= example_list,
    title="Vision Transformer CIFAR-10 Classifier",
    description="Upload a CIFAR-10 image, and the ViT Tiny model will predict the class."
)
demo.launch()

Overwriting demo/cifar/app.py


## Creating a requirement file `requirements.txt`

In [None]:
%%writefile demo/cifar/requirements.txt
torch >= 1.12.0
torchvision >= 0.13.0
gradio >= 3.1.4

Writing demo/cifar/requirements.txt


##  Deploying our FoodVision Mini app HuggingFace Spaces

In [None]:
!ls demo/cifar/examples/

Airplane.jpg  automobile.jpg  bird.jpg


In [None]:
# Change into the foodvision_mini directory and then zip it from the inside
!cd demo/cifar && zip -r ../cifar.zip * -x "*.pyc" "*.ipynb" "*__pycache__*" "*ipynb_checkpoints*"

  adding: app.py (deflated 54%)
  adding: examples/ (stored 0%)
  adding: examples/Airplane.jpg (deflated 9%)
  adding: examples/bird.jpg (deflated 1%)
  adding: examples/automobile.jpg (deflated 11%)
  adding: model.py (deflated 48%)
  adding: requirements.txt (deflated 11%)
  adding: ViT_feature_extractor.pth (deflated 7%)


In [None]:
# Download
try:
  from google.colab import files
  files.download('demo/cifar.zip')
except:
  print(f"Not running in Google Colab, can't use google.colab.files.download(), please download foodvision_mini.zip manually.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>