<a href="https://colab.research.google.com/github/Kumar-Sanskar-2724/Vision-Transformer/blob/main/Vision_Transformer_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets,transforms
import timm
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from PIL import Image
# Get going_modular directory
try:
  from going_modular.going_modular import data_setup,engine
  from helper_functions import plot_loss_curves,set_seeds,download_data
except:
  print(f"[INFO] Couldn't find going_modular and helper_functions directory, downloading them from GitHub...")
  !git clone https://github.com/mrdbourke/pytorch-deep-learning
  !mv pytorch-deep-learning/going_modular .
  !mv pytorch-deep-learning/helper_functions.py .
  !rm -rf pytorch-deep-learning
  from going_modular.going_modular import data_setup,engine
  from helper_functions import download_data,set_seeds,plot_loss_curves

[INFO] Couldn't find going_modular and helper_functions directory, downloading them from GitHub...
Cloning into 'pytorch-deep-learning'...
remote: Enumerating objects: 4393, done.[K
remote: Counting objects: 100% (1534/1534), done.[K
remote: Compressing objects: 100% (133/133), done.[K
remote: Total 4393 (delta 1457), reused 1401 (delta 1401), pack-reused 2859 (from 2)[K
Receiving objects: 100% (4393/4393), 650.71 MiB | 38.09 MiB/s, done.
Resolving deltas: 100% (2659/2659), done.
Updating files: 100% (248/248), done.


## Setting Devic Agnostic Code

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

## Getting Data

In [8]:
train_dataset = datasets.CIFAR10(root='./data',train=True,transform=transform,download=True)
test_dataset = datasets.CIFAR10(root='./data',train=False,transform=transform,download=True)

100%|██████████| 170M/170M [00:03<00:00, 43.1MB/s]


In [9]:
class_names=train_dataset.classes
class_names
class_idx = train_dataset.class_to_idx
class_idx

{'airplane': 0,
 'automobile': 1,
 'bird': 2,
 'cat': 3,
 'deer': 4,
 'dog': 5,
 'frog': 6,
 'horse': 7,
 'ship': 8,
 'truck': 9}

In [51]:
example = download_data(source='https://github.com/Kumar-Sanskar-2724/Vision-Transformer/raw/refs/heads/main/examples/examples.zip',destination='examples')

[INFO] data/examples directory exists, skipping download.


In [10]:
label_map = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'
}

In [11]:
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=False)

## Setting up Model

In [12]:
model = timm.create_model('vit_tiny_patch16_224',pretrained=True)
model.head = nn.Linear(in_features=model.head.in_features,out_features=10)
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/22.9M [00:00<?, ?B/s]

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=192, out_features=576, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=192, out_features=192, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=192, out_features=768, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()


## Creating a function to time our experiments

In [13]:
from timeit import default_timer as timer
def print_train_time(start:float,
                     end:float,
                     device:torch.device=None):
  """ Prints difference between start time and end time"""
  train_time = end - start
  print(f"Total train time on:{device} {train_time:.3f} seconds")
  return train_time

## Training and Evaluation

In [14]:
loss_fn=nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(),lr=3e-5)

In [15]:
from going_modular.going_modular import engine
set_seeds()
model_results = engine.train(model=model,
                             train_dataloader=train_dataloader,
                             test_dataloader=test_dataloader,
                             optimizer=optimizer,
                             loss_fn=loss_fn,
                             epochs=5,
                             device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.2117 | train_acc: 0.9309 | test_loss: 0.1160 | test_acc: 0.9618
Epoch: 2 | train_loss: 0.0611 | train_acc: 0.9794 | test_loss: 0.1141 | test_acc: 0.9651
Epoch: 3 | train_loss: 0.0368 | train_acc: 0.9873 | test_loss: 0.1425 | test_acc: 0.9618
Epoch: 4 | train_loss: 0.0257 | train_acc: 0.9914 | test_loss: 0.1609 | test_acc: 0.9587
Epoch: 5 | train_loss: 0.0229 | train_acc: 0.9921 | test_loss: 0.1580 | test_acc: 0.9603


## Saving our model

In [16]:
from going_modular.going_modular import utils
utils.save_model(model=model,
                 target_dir='models',
                 model_name='ViT_feature_extractor.pth')

[INFO] Saving model to: models/ViT_feature_extractor.pth


## Model Deplyoment

In [17]:
# Import/install gradio
try:
  import gradio as gr
except:
  !pip -q install gradio
  import gradio as gr

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m130.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [48]:
# Auto-collect example images
example_list = [[path] for path in glob.glob("data/examples/*.jpg")]
example_list

[['data/examples/ship.jpg'],
 ['data/examples/dog.jpg'],
 ['data/examples/cat.jpg'],
 ['data/examples/Airplane.jpg'],
 ['data/examples/bird.jpg'],
 ['data/examples/frog.jpg'],
 ['data/examples/automobile.jpg'],
 ['data/examples/deer.jpg'],
 ['data/examples/truck.jpg'],
 ['data/examples/horse.jpg']]

In [49]:
def predict(image):
    image = image.convert("RGB")
    input_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        predicted_class = output.argmax(1).item()

    return f"Predicted Class: {label_map[predicted_class]}"

In [50]:
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs='text',
    examples= example_list,
    title="Vision Transformer CIFAR-10 Classifier",
    description="Upload a CIFAR-10 image, and the ViT Tiny model will predict the class."
)
demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f7541b34239cee280a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


