In [1]:
import torch
import torch.nn as nn
from torch.optim import Adam
from google.colab import drive
import os
import cv2
from google.colab.patches import cv2_imshow
import random
drive.mount("/content/drive/")


Mounted at /content/drive/


In [2]:
class PatchEmbedding(nn.Module):
  def __init__(self,imgSize,patchSize,numOfChannels=1,embeddingDimension=769):
    super().__init__()
    self.imgSize=imgSize
    self.patchSize=patchSize
    self.numOfPatches = (imgSize//patchSize)**2
    self.projection = nn.Conv2d(numOfChannels,embeddingDimension,kernel_size=patchSize,stride=patchSize)
  
  def forward(self,input):
    input = self.projection(input) #(number of samples,embeding dimension, sqrt number of patches,sqrt number of patches )
    input = input.flatten(2) #(number of samples,embeding dimension,number of patches)
    input = input.transpose(1,2) #(number of samples,number of patches, embeding dimension)
    return input

In [3]:
class AttentionModule(nn.Module):
  def __init__(self,dimensions,numOfHeads=12,queryKeyValueBias=True,kvpDropoutProbability=0.,projectionDropoutProbability=0.):
    super().__init__()
    self.numOfHeads = numOfHeads
    self.dimensions = dimensions
    self.headDimension = dimensions//numOfHeads
    self.normalizationFactor = self.headDimension** -0.5  # extremly large values to softmax -> small gradients
    
    self.queryKeyValue = nn.Linear(dimensions,dimensions*3,bias =queryKeyValueBias )
    self.kvpDropout = nn.Dropout(kvpDropoutProbability)
    self.projection = nn.Linear(dimensions,dimensions)
    self.projectionDropout = nn.Dropout(projectionDropoutProbability)

  def forward(self,input):
    numOfSamples,numOfTokens,dimensions = input.shape
    if dimensions != self.dimensions:
      raise ValueError("Dimensions shape in Attention Module")
    queryKeysValues = self.queryKeyValue(input) #(number of samples, number of patches+1, dimensions *3)
    queryKeysValues = queryKeysValues.reshape(numOfSamples,numOfTokens,3,self.numOfHeads,self.headDimensions)
    queryKeysValues = queryKeysValues.permute(2,0,3,1,4) #(3,number of samples, number of heads,number of patches +1, head dimenson)
    query,key,value = queryKeysValues[0],queryKeysValues[1],queryKeysValues[2]
    keyTranspose = key.Transpose(-2,-1) #(number of samples, number of heads,head dimension,number of patches)
    dotProduct = (query @ keyTranspose) * self.normalizationFactor # (number of samples,number of heads,number of patches +1, number of patches + 1)
    attention = dotProduct.softmax(dim=-1) # (number of samples,number of heads,number of patches +1, number of patches + 1)
    attention = self.kvpDropout(attention)

    weightedAverage = attention @ value # (number of samples,number of heads,number of patches +1, head dimension)
    weightedAverage = weightedAverage.Transpose(1,2) #(number of samples, number of patches + 1,number of heads,head dimension)
    weightedAverage = weightedAverage.flatten(2) #(number of samples, number of patches +1, dimensions)
    input = self.projection(weightedAverage)
    input = self.projectionDropout(input)
    return input
     

In [4]:
class MLP(nn.Module):
   def __init__(self,inputFeatures,hiddenFeatures,outputFeatures,prob=0.):
      super().__init__()
      self.fullyConnected1= nn.Linear(inputFeatures,hiddenFeatures)
      self.activateion = nn.GELU()
      self.fulltConnected2= nn.Linear(hiddenFeatures,outputFeatures)
      self.dropout = nn.Dropout(prob)
   
   def forward (self,input):
     input = self.fullyConnected1(input)
     input = self.activateion(input)
     input = self.dropout(input)
     input = self.fullyConnected2(input)
     input = self.dropout(input)
     return input

In [5]:
class TransformerBlock(nn.Module):
  def __init__(self,dimensions,numOfHeads,MLPRatio,QKVBias=True,p=0.,attentionProb=0.):
    super(TransformerBlock,self).__init__()
    self.normalization1 = nn.LayerNorm(dimensions,eps=1e-6)
    self.attention = AttentionModule(dimensions,numOfHeads,QKVBias,attentionProb,p)
    self.normalization2= nn.LayerNorm(dimensions,eps=1e-6)
    hiddenFeatures = int(dimensions*MLPRatio)
    self.mlp = MLP(dimensions,hiddenFeatures,dimensions)

  def forward(self,input):
    #residual block
    input = input + self.attention(self.normalization1(input))
    input = input + self.mlp(self.normalization2(input))
    return input
     

In [6]:
class VisionTransformer(nn.Module):
  def __init__(self,imgSize,patchSize,numOfChannels,numOfClasses,embeddingDim,depth, numOfHeads,MLPRatio,QKVBias,p=0.,attentionProb=0.):
    super(VisionTransformer,self).__init__()
    self.patchEmbedding = PatchEmbedding(imgSize,patchSize,numOfChannels,embeddingDim)
    self.classToken = nn.Parameter(torch.zeros(1,1,embeddingDim))
    self.positionalEmbedding = nn.Parameter(torch.zeros(1,1+self.patchEmbedding.numOfPatches,embeddingDim))
    self.positionDropout = nn.Dropout(p)
    self.blocks = nn.ModuleList([TransformerBlock(embeddingDim,numOfHeads,MLPRatio,QKVBias,p,attentionProb) for ctr in range (depth)])
    self.normalization = nn.LayerNorm(embeddingDim,eps= 1e-6)
    self.head = nn.Linear(embeddingDim,numOfClasses)
 
    def foward(self,input):
      numOfSamples = input.shape[0]
      input = self.patchEmbedding(input)
      classTokens = self.classToken.expand(numOfSamples,-1,-1)
      input = torch.cat((classTokens,input),dim=1)
      input = input + self.positionalEmbedding
      input = self.positionDropout(input)
      for block in self.blocks:
        input = block(input)
        input = self.normalization(input)
        finalClassTokens = input[:,0]
        input = self.head(finalClassTokens)
      return input

In [7]:
model = VisionTransformer(200,20,1,5,768,12,8,0.4,True,0.3,0.2)

In [8]:
lossFunction = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(),lr=0.001,weight_decay=0.0001)


In [9]:
mainPath = "/content/drive/MyDrive/Deep_learning/Identification Data"
classes = ['personA','personB','personC','personD','personE']
trainSet = []
testSet = []
imgSize = 200
for person in os.listdir(mainPath):
   personPath = os.path.join(mainPath,person)
   for dataDir in os.listdir(personPath):
      imagesPath = os.path.join(personPath,dataDir)
      for image in os.listdir(imagesPath):
        imagePath = os.path.join(imagesPath,image)
        try:
           image = cv2.imread(imagePath,0)
           image = cv2.resize(image,(imgSize,imgSize))
           label = classes.index(person)
           if dataDir == "Train":
              trainSet.append([image,label])
           else:
             testSet.append([image,label])
        except:
            print(imagePath)

/content/drive/MyDrive/Deep_learning/Identification Data/personD/Train/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personD/Test/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personC/Train/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personC/Test/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personA/Train/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personA/Test/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personE/Test/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personE/Train/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personB/Test/.ipynb_checkpoints
/content/drive/MyDrive/Deep_learning/Identification Data/personB/Train/.ipynb_checkpoints


In [10]:
random.shuffle(trainSet)
random.shuffle(testSet)

In [11]:
from torch.autograd import Variable

In [12]:
def saveModel():
    path = "./myFirstModel.pth"
    torch.save(model.state_dict(), path)

In [13]:
def testAccuracy():
    
    model.eval()
    accuracy = 0.0
    total = 0.0
    
    with torch.no_grad():
        for data in testSet:
            images, labels = data
            # run the model on the test set to predict labels
            outputs = model(images)
            # the label with the highest energy will be our prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            accuracy += (predicted == labels).sum().item()
    
    # compute the accuracy over all test images
    accuracy = (100 * accuracy / total)
    return(accuracy)

In [14]:
def train(num_epochs):
    
    best_accuracy = 0.0

    # Define your execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")
    # Convert model parameters and buffers to CPU or Cuda
    model.to(device)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        running_acc = 0.0
        for i, (images, labels) in enumerate(trainSet, 0):
            # get the inputs
            #images = Variable(images.to(device))
            #labels = Variable(labels.to(device))

            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model.forward(images)
            # compute the loss based on model output and real labels
            loss = lossFunction(outputs, labels)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every 1,000 images
            running_loss += loss.item()     # extract the loss value
            if i % 1000 == 999:    
                # print every 1000 (twice per epoch) 
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 1000))
                # zero the loss
                running_loss = 0.0
        # Compute and print the average accuracy fo this epoch when tested over all 10000 test images
        accuracy = testAccuracy()
        print('For epoch', epoch+1,'the test accuracy over the whole test set is %d %%' % (accuracy))
        
        # we want to save the model if the accuracy is the best
        if accuracy > best_accuracy:
            saveModel()
            best_accuracy = accuracy

In [15]:
train(10)

The model will be running on cuda:0 device


NotImplementedError: ignored