In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms.functional as Ft
import torchvision.transforms as transforms
import torchvision
import torch.onnx
from torch.utils.data import Dataset,DataLoader
import numpy as np
import math
import os
import pandas as pd
from PIL import Image
from skimage import io
# device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
VGG_types = {
    "VGG11": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG13": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"],
    "VGG16": [
        64,
        64,
        "M",
        128,
        128,
        "M",
        256,
        256,
        256,
        "M",
        512,
        512,
        512,
        "M",
        512,
        512,
        512,
        "M",
    ],
    "VGG19": [
        64,
        64,
        "M",
        128,
        128,
        "M",
        256,
        256,
        256,
        256,
        "M",
        512,
        512,
        512,
        512,
        "M",
        512,
        512,
        512,
        512,
        "M",
    ],
}

label={
    "0":0,
    "1":1,
    "2":2,
    "3":3,
    "4":4,
    "5":5,
    "6":6,
    "7":7,
    "8":8,
    "9":9,
    "a":10,
    "b":11,
    "c":12,
    "d":13,
    "e":14,
    "f":15,
    "g":16,
    "h":17,
    "i":18,
    "j":19,
    "k":20,
    "l":21,
    "m":22,
    "n":23,
    "o":24,
    "p":25,
    "q":26,
    "r":27,
    "s":28,
    "t":29,
    "u":30,
    "v":31,
    "w":32,
    "x":33,
    "y":34,
    "z":35,
    "A":36,
    "B":37,
    "C":38,
    "D":39,
    "E":40,
    "F":41,
    "G":42,
    "H":43,
    "I":44,
    "J":45,
    "K":46,
    "L":47,
    "M":48,
    "N":49,
    "O":50,
    "P":51,
    "Q":52,
    "R":53,
    "S":54,
    "T":55,
    "U":56,
    "V":57,
    "W":58,
    "X":59,
    "Y":60,
    "Z":61,
}

In [6]:
class Model(nn.Module):
    def __init__(self,in_channels,num_classes):
        super(Model,self).__init__()
        self.in_channels=in_channels
        
        self.conv_layer=self.create_conv_layers(VGG_types["VGG16"])
        self.fcs=nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes),
        )
        
    def forward(self,x):
        x = self.conv_layer(x)
        x=x.reshape(x.shape[0],-1)
        x = self.fcs(x)
        return x
    
    
    def create_conv_layers(self,architecture):
        layers=[]
        in_channels=self.in_channels
        
        for x in architecture:
            
            if type(x)==int:
                out_channels=x
                layers+=[
                    nn.Conv2d
                    (
                        in_channels,out_channels,
                        kernel_size=(3, 3),
                        stride=(1, 1),
                        padding=(1, 1),
                    ),
                    nn.BatchNorm2d(x),
                    nn.ReLU(),
                ]
                in_channels=x
                
            elif x=="M":
                layers+=[nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]
                
        return nn.Sequential(*layers)
        

In [None]:
model = Model(in_channels=3, num_classes=62)
# model.to(device)

In [None]:
class English(Dataset):
    def __init__(self,csv_file,root_dir,transform):
        self.annotations=pd.read_csv(csv_file)
        self.root_dir=root_dir
        self.transform=transform
    
    def __len__(self):
        return len(self.annotations)
    
    def __getitem__(self,index):
        img_path=os.path.join("./data/",self.annotations.iloc[index,0])
        image=Image.open(img_path)
        global label
        y_label=torch.tensor(label[str(self.annotations.iloc[index,1])])
      
        if self.transform:
            image=self.transform(image)
            
        return (image,y_label)

In [None]:
transform=transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [None]:
dataset=English("./data/english.csv","./data",transform)

In [None]:
train_set,test_set=torch.utils.data.random_split(dataset,[2728,682])
train_loader=DataLoader(dataset=train_set,batch_size=32,shuffle=True)
test_loader=DataLoader(dataset=test_set,batch_size=32,shuffle=True)

In [None]:
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=1e-3)

In [None]:
for e in range(100):
    running_loss=0
    for i,(inputs,labels) in enumerate(train_loader):
        inputs=inputs.to(device)
        labels=labels.to(device)
        
        outputs=model(inputs)
        loss=criterion(outputs,labels)
        running_loss+=float(loss)
        
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
    
    print("Epoch:"+str(e)+" loss:"+str(running_loss/len(train_loader)))

In [25]:
PATH="weights.pth"
# torch.save(model.state_dict(), PATH)

# Testing

In [26]:
model = Model(in_channels=3, num_classes=62)
model.load_state_dict(torch.load(PATH))
# model.to(device)
model.eval()

Model(
  (conv_layer): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3), st

In [None]:
correct=0
total=0
with torch.no_grad():    
    for i,(inputs,labels) in enumerate(test_loader):
#         inputs=inputs.to(device)
#         labels=labels.to(device)
        
        outputs=model(inputs)
        
        for j,x in enumerate(outputs):
            
            if torch.argmax(x)==labels[j]:
                correct+=1
            
            total+=1
    
    print(f'Total:{total}, Correct:{correct}, Percentage:{(correct*100)/total}')

# Using ONNX format

In [41]:
import cv2
import numpy as np

transform=transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])


net =  cv2.dnn.readNetFromONNX("vgg.onnx") 
img = Image.open("img006-008.png")
img=transform(img)
img_tensor=torch.unsqueeze(img,0)
print(torch.argmax(model(img_tensor)))
img_tensor=img_tensor[0].numpy().transpose(1, 2, 0)

# img_numpy=img.numpy()
# open_cv_image = np.array(img) 
# # Convert RGB to BGR 
# open_cv_image = img_numpy[:, :, ::-1].copy()

blob = cv2.dnn.blobFromImage(img_tensor)
net.setInput(blob)
preds = net.forward()
biggest_pred_index = np.array(preds)[0].argmax()
print ("Predicted class:",biggest_pred_index)

torch.Size([1, 3, 224, 224])
tensor(5)
Predicted class: 5
(224, 224, 3)


In [15]:
??cv2.dnn.blobFromImage