## install dependencies

In [None]:
!pip install transformers
!pip install split_folders

## Import libraries

In [None]:
import numpy as np
import shutil
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
# demonstration of calculating metrics for a neural network model using sklearn
from sklearn.datasets import make_circles
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

from transformers import ViTModel
from transformers.modeling_outputs import SequenceClassifierOutput
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
from torch.autograd import Variable
from transformers import ViTFeatureExtractor
from PIL import Image
import os, sys
import torchvision
import splitfolders
from torchvision.transforms import ToTensor

from torchvision import transforms
import requests



## model preparation

In [None]:
class ViTForImageClassification(nn.Module):
    def __init__(self, num_labels=3):
        super(ViTForImageClassification, self).__init__()
        self.vit = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(self.vit.config.hidden_size, num_labels)
        self.num_labels = num_labels

    def forward(self, pixel_values, labels):
        outputs = self.vit(pixel_values=pixel_values)
        output = self.dropout(outputs.last_hidden_state[:,0])
        logits = self.classifier(output)

        loss = None
        if labels is not None:
          loss_fct = nn.CrossEntropyLoss()
          loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
        if loss is not None:
          return logits, loss.item()
        else:
          return logits, None

In [None]:
EPOCHS = 3
BATCH_SIZE = 10
LEARNING_RATE = 2e-5
try:
  total_classes=len(train_ds.classes)
except:
  total_classes=3

We will use the pretrained Vision Transformer feature extractor, an Adam Optimizer, and a Cross Entropy Loss function.

In [None]:

# Define Model
model_classifier = ViTForImageClassification(total_classes)    
# Feature Extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
# Adam Optimizer
optimizer = torch.optim.Adam(model_classifier.parameters(), lr=LEARNING_RATE)
# Cross Entropy Loss
loss_func = nn.CrossEntropyLoss()
# Use GPU if available  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
if torch.cuda.is_available():
    model_classifier.cuda() 

Downloading:   0%|          | 0.00/425 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/330M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/160 [00:00<?, ?B/s]

## Downloading and Preparing Images Dataset

In [None]:
!mkdir bar_chart
!mkdir pie_chart
!mkdir line_chart

In [None]:
##Downloading Datasets of Graphs Images for Training 
!gdown --id 1Jt0lOgsZEglG3K6zuJ7kN7xWYzWqdnM6
!unzip /content/graphs-20210921T091128Z-001.zip

In [None]:
#!/usr/bin/python

def resize(path):
    dirs = os.listdir(path)
    for item in dirs:
        if os.path.isfile(path+item):
            im = Image.open(path+item)
            f, e = os.path.splitext(path+item)
            imResize = im.resize((224,224), Image.ANTIALIAS)
            imResize.save(path+item, 'JPEG', quality=90)
        
resize("/content/graphs/")

In [None]:
!mkdir data

In [None]:
os.rename("/content/line_chart/", "/content/data/line_chart/")
os.rename("/content/pie_chart/", "/content/data/pie_chart/")
os.rename("/content/bar_chart/", "/content/data/bar_chart/")

In [None]:
for a in os.listdir("/content/graphs"):
  if a.startswith("pie_chart_"):
    os.rename("/content/graphs/{}".format(a), "/content/data/pie_chart/{}".format(a))
  elif a.startswith("bar_chart_"):
    os.rename("/content/graphs/{}".format(a), "/content/data/bar_chart/{}".format(a))
  elif a.startswith("graph_"):
    os.rename("/content/graphs/{}".format(a), "/content/data/line_chart/{}".format(a))
  else:
    pass 
  

In [None]:
shutil.rmtree("/content/graphs")

In [None]:

splitfolders.ratio('data', output="output", seed=1337, ratio=(.8, 0.1,0.1)) 

Copying files: 5748 files [00:01, 4810.25 files/s]


In [None]:


train_ds = torchvision.datasets.ImageFolder('/content/output/train/', transform=ToTensor())
valid_ds = torchvision.datasets.ImageFolder('/content/output/val/', transform=ToTensor())
test_ds = torchvision.datasets.ImageFolder('/content/output/test/', transform=ToTensor())

Next, convert the folder structure dataset into a PyTorch dataset format using PyTorch's ImageFolder dataset structure:

## Training the Model

In [None]:
#################################### Un_Comment to Train Model on Any Dataset #################################

print("Number of train samples: ", len(train_ds))
print("Number of test samples: ", len(test_ds))
print("Detected Classes are: ", train_ds.class_to_idx) 

train_loader = data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  num_workers=4)
test_loader  = data.DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) 
try:
  # Train the model_classifier
  for epoch in range(EPOCHS):        
    for step, (x, y) in enumerate(train_loader):
      # Change input array into list with each batch being one element
      x = np.split(np.squeeze(np.array(x)), BATCH_SIZE)
      # Remove unecessary dimension
      for index, array in enumerate(x):
        x[index] = np.squeeze(array)
      # Apply feature extractor, stack back into 1 tensor and then convert to tensor
      x = torch.tensor(np.stack(feature_extractor(x)['pixel_values'], axis=0))
      # Send to GPU if available
      x, y  = x.to(device), y.to(device)
      b_x = Variable(x)   # batch x (image)
      b_y = Variable(y)   # batch y (target)
      # Feed through model_classifier
      output, loss = model_classifier(b_x, None)
      # Calculate loss
      if loss is None: 
        loss = loss_func(output, b_y)   
        optimizer.zero_grad()           
        loss.backward()                 
        optimizer.step()

      if step % 50 == 0:
        # Get the next batch for testing purposes
        test = next(iter(test_loader))
        test_x = test[0]
        # Reshape and get feature matrices as needed
        test_x = np.split(np.squeeze(np.array(test_x)), BATCH_SIZE)
        for index, array in enumerate(test_x):
          test_x[index] = np.squeeze(array)
        test_x = torch.tensor(np.stack(feature_extractor(test_x)['pixel_values'], axis=0))
        # Send to appropirate computing device
        test_x = test_x.to(device)
        test_y = test[1].to(device)
        # Get output (+ respective class) and compare to target
        test_output, loss = model_classifier(test_x, test_y)
        test_output = test_output.argmax(1)
        # Calculate Accuracy
        accuracy = (test_output == test_y).sum().item() / BATCH_SIZE
        print('Epoch: ', epoch, '| train loss: %.4f' % loss, '| test accuracy: %.2f' % accuracy)
except:
  print('Training completed.')

Number of train samples:  4597
Number of test samples:  577
Detected Classes are:  {'bar_chart': 0, 'line_chart': 1, 'pie_chart': 2}


  cpuset_checked))


Epoch:  0 | train loss: 0.0251 | test accuracy: 1.00
Epoch:  0 | train loss: 0.0276 | test accuracy: 1.00
Epoch:  0 | train loss: 0.0232 | test accuracy: 1.00
Epoch:  0 | train loss: 0.0313 | test accuracy: 1.00
Epoch:  0 | train loss: 0.1419 | test accuracy: 0.90
Epoch:  0 | train loss: 0.0286 | test accuracy: 1.00
Epoch:  0 | train loss: 0.0488 | test accuracy: 1.00
Epoch:  0 | train loss: 0.0280 | test accuracy: 1.00
Epoch:  0 | train loss: 0.0362 | test accuracy: 1.00
Epoch:  0 | train loss: 0.0239 | test accuracy: 1.00
Training completed.


Save the Entire Model (We can save the entire model as follows:)



In [None]:
torch.save(model, '/content/model_classifer.pt')

NameError: ignored

Export Trained Model

Now that you have trained your custom vision transformer, you can export the trained model you have made here for inference on your device elsewhere

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

%cp /content/model.pt /content/gdrive/My\ Drive

## Evaluation of Model (Un-comment this Sections to Use)

Finally, let's evaluate the model on a test image:

In [None]:
prdiction_list=[]
actual_list=[]
EVAL_BATCH = 1
eval_loader  = data.DataLoader(valid_ds, batch_size=EVAL_BATCH, shuffle=False, num_workers=4) 

  cpuset_checked))


In [None]:
# Main testing loop
with torch.no_grad():
  generator = iter(eval_loader)
  for i in range(len(eval_loader)):
    try:
      # Samples the batch
      inputs, target = next(generator)
      inputs = inputs[0].permute(1, 2, 0)
      # Save original Input
      originalInput = inputs
      for index, array in enumerate(inputs):
        inputs[index] = np.squeeze(array)
      inputs = torch.tensor(np.stack(feature_extractor(inputs)['pixel_values'], axis=0))

      # Send to appropriate computing device
      inputs = inputs.to(device)
      target = target.to(device)

      # Generate prediction
      prediction, loss = model_classifier(inputs, target)
        
      # Predicted class value using argmax
      predicted_class = np.argmax(prediction.cpu())
      value_predicted = list(valid_ds.class_to_idx.keys())[list(valid_ds.class_to_idx.values()).index(predicted_class)]
      value_target = list(valid_ds.class_to_idx.keys())[list(valid_ds.class_to_idx.values()).index(target)]
      prdiction_list.append(value_predicted)
      actual_list.append(value_target)
      ### To See Visually while Looping
      # plt.imshow(originalInput)
      # plt.xlim(224,0)
      # plt.ylim(224,0)
      # plt.title(f'Prediction: {value_predicted} - Actual target: {value_target}')
      # plt.show()


    except StopIteration:
        # restart the generator if the previous generator is exhausted.
        generator = iter(trainloader)
        inputs, inputs = next(generator)

  cpuset_checked))


### Evaluation Accuracy,Precision,Recall and F1 Score

In [None]:

actual_list=np.array(actual_list)
prdiction_list=np.array(prdiction_list)

# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(actual_list, prdiction_list)
print('Accuracy: %f' % accuracy)
# precision tp / (tp + fp)
precision = precision_score(actual_list, prdiction_list, pos_label='positive', average='micro')
print('Precision: %f' % precision)
# recall: tp / (tp + fn)
recall = recall_score(actual_list, prdiction_list, pos_label='positive', average='micro')
print('Recall: %f' % recall)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(actual_list, prdiction_list, pos_label='positive', average='micro')
print('F1 score: %f' % f1)


Accuracy: 0.987805
Precision: 0.987805
Recall: 0.987805
F1 score: 0.987805




## Loading Pre-Trained Model (GPU is Essential)

In [None]:
###downloading Pre-Trained Model
!gdown --id 1AC9TfiOvpHqTFVNCVXTwr7jOKRJDPxdO #Input GDrive ID
#Loading Pre-Trained Model
MODEL_PATH = '/content/model_classifer.pt'
model_classifier = torch.load(MODEL_PATH)
#model.eval()

Downloading...
From: https://drive.google.com/uc?id=1AC9TfiOvpHqTFVNCVXTwr7jOKRJDPxdO
To: /content/model_classifer.pt
346MB [00:04, 77.9MB/s]
