<a href="https://colab.research.google.com/github/Kumar-Sanskar-2724/Vision-Transformer/blob/main/Vision_Transformer_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision timm scikit-learn fastapi uvicorn --quiet

In [None]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets,transforms
import timm
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from fastapi import FastAPI, UploadFile
from PIL import Image
import io
# Get going_modular directory
try:
  from going_modular.going_modular import data_setup,engine
  from helper_functions import plot_loss_curves,set_seeds,download_data
except:
  print(f"[INFO] Couldn't find going_modular and helper_functions directory, downloading them from GitHub...")
  !git clone https://github.com/mrdbourke/pytorch-deep-learning
  !mv pytorch-deep-learning/going_modular .
  !mv pytorch-deep-learning/helper_functions.py .
  !rm -rf pytorch-deep-learning
  from going_modular.going_modular import data_setup,engine
  from helper_functions import download_data,set_seeds,plot_loss_curves

[INFO] Couldn't find going_modular and helper_functions directory, downloading them from GitHub...
Cloning into 'pytorch-deep-learning'...
remote: Enumerating objects: 4393, done.[K
remote: Counting objects: 100% (1534/1534), done.[K
remote: Compressing objects: 100% (135/135), done.[K
remote: Total 4393 (delta 1457), reused 1399 (delta 1399), pack-reused 2859 (from 2)[K
Receiving objects: 100% (4393/4393), 650.71 MiB | 28.59 MiB/s, done.
Resolving deltas: 100% (2659/2659), done.
Updating files: 100% (248/248), done.


## Setting Devic Agnostic Code

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

## Getting Data

In [None]:
train_dataset = datasets.CIFAR10(root='./data',train=True,transform=transform,download=True)
test_dataset = datasets.CIFAR10(root='./data',train=False,transform=transform,download=True)

In [None]:
class_names=train_dataset.classes
class_names
class_idx = train_dataset.class_to_idx
class_idx

{'airplane': 0,
 'automobile': 1,
 'bird': 2,
 'cat': 3,
 'deer': 4,
 'dog': 5,
 'frog': 6,
 'horse': 7,
 'ship': 8,
 'truck': 9}

In [None]:
label_map = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'
}

In [None]:
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=False)

## Setting up Model

In [None]:
model = timm.create_model('vit_tiny_patch16_224',pretrained=True)
model.head = nn.Linear(in_features=model.head.in_features,out_features=10)
model.to(device)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=192, out_features=576, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=192, out_features=192, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((192,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=192, out_features=768, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity()


## Creating a function to time our experiments

In [None]:
from timeit import default_timer as timer
def print_train_time(start:float,
                     end:float,
                     device:torch.device=None):
  """ Prints difference between start time and end time"""
  train_time = end - start
  print(f"Total train time on:{device} {train_time:.3f} seconds")
  return train_time

## Training and Evaluation

In [None]:
loss_fn=nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(),lr=3e-5)

In [None]:
from going_modular.going_modular import engine
set_seeds()
model_results = engine.train(model=model,
                             train_dataloader=train_dataloader,
                             test_dataloader=test_dataloader,
                             optimizer=optimizer,
                             loss_fn=loss_fn,
                             epochs=5,
                             device=device)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.2082 | train_acc: 0.9331 | test_loss: 0.1323 | test_acc: 0.9573
Epoch: 2 | train_loss: 0.0593 | train_acc: 0.9802 | test_loss: 0.1283 | test_acc: 0.9604
Epoch: 3 | train_loss: 0.0353 | train_acc: 0.9880 | test_loss: 0.1340 | test_acc: 0.9627
Epoch: 4 | train_loss: 0.0249 | train_acc: 0.9913 | test_loss: 0.1475 | test_acc: 0.9612
Epoch: 5 | train_loss: 0.0219 | train_acc: 0.9924 | test_loss: 0.1454 | test_acc: 0.9642


## Saving our model

In [None]:
from going_modular.going_modular import utils
utils.save_model(model=model,
                 target_dir='models',
                 model_name='ViT_feature_extractor.pth')

[INFO] Saving model to: models/ViT_feature_extractor.pth


## Model Deplyoment

In [None]:
# Import/install gradio
try:
  import gradio as gr
except:
  !pip -q install gradio
  import gradio as gr

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.4/11.4 MB[0m [31m73.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
def predict_tensor(input_tensor):
    if len(input_tensor.shape) == 3:
        input_tensor = input_tensor.unsqueeze(0)  # add batch dimension
        input_tensor = input_tensor.to(device)

    with torch.no_grad():
        output = model(input_tensor)
        predicted_class = output.argmax(1).item()

    return f"Predicted Class: {label_map[predicted_class]}"

In [None]:
image, label = train_dataset[0]  # image is a Tensor
print("Ground Truth:", label_map[label])
print("Prediction:", predict_tensor(image))

Ground Truth: frog
Prediction: Predicted Class: frog


In [None]:
def predict(image):
    image = image.convert("RGB")
    input_tensor = transform(image).unsqueeze(0)
    input_tensor = input_tensor.to(device)
    with torch.no_grad():
        output = model(input_tensor)
        predicted_class = output.argmax(1).item()

    return f"Predicted Class: {label_map[predicted_class]}"


In [None]:
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="Vision Transformer CIFAR-10 Classifier",
    description="Upload a CIFAR-10 image, and the ViT Tiny model will predict the class."
)
demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ac008c5754183c4f15.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


