In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

In [None]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
# from detectron2.structures import BoxMode

# Register a COCO Format Dataset

In [None]:
from detectron2.data.datasets import register_coco_instances
DatasetCatalog.clear()

In [None]:
register_coco_instances("train_dataset", {}, "../dataset/FINAL/train/annotation.json", "../dataset/FINAL/train")
# register_coco_instances("validation_dataset", {}, "result.json", "path/to/image/dir")
# register_coco_instances("test_dataset", {}, "result.json", "path/to/image/dir")

In [None]:
# load the dictionary
dataset_dicts = DatasetCatalog.get('train_dataset')

In [None]:
cfg = get_cfg()

# cfg.MODEL.DEVICE = "cpu"
# load the pre trained model from Detectron2 model zoo
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
# set confidence threshold for this model
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  
# load model weights
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")

In [None]:
# test the model using the image
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

In [None]:
n_samples = 1

# visualize N number of samples
for d in random.sample(dataset_dicts, n_samples):
#     filename = d["file_name"].replace('\\','/')
    print(filename)
    
    im = cv2.imread(filename)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    predictor = DefaultPredictor(cfg)
    outputs = predictor(im)
    
    v = Visualizer(im[:,:,::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
pred_keypoints = outputs["instances"].to("cpu").pred_keypoints.numpy()
pred_keypoints[0]

# Create a Classifier model

In [None]:
# !pip install pandas
import pandas as pd

In [None]:
def add_to_df(df, keypoints, classlabel):
    temp_dict = dict()
    temp_dict['letter'] = classlabel
    
    for i in range(17):
        temp_dict['x'+str(i)] = keypoints[i,0]
        temp_dict['y'+str(i)] = keypoints[i,1]
    
    df = df.append(temp_dict, ignore_index=True)
    
    return df

In [None]:
# define the dataframe
df = pd.DataFrame(columns = ['x0','y0','x1','y1','x2','y2','x3','y3','x4','y4','x5','y5','x6','y6',
                             'x7','y7','x8','y8','x9','y9','x10','y10','x11','y11','x12','y12',
                             'x13','y13','x14','y14','x15','y15','x16','y16','letter'])
print(df)

In [None]:
# make inference for each images and assign the corresponding class
for d in dataset_dicts:
    filename = d["file_name"].replace('\\','/')
    
    im = cv2.imread(filename)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    outputs = predictor(im)
    
    #get keypoints
    pred_keypoints = outputs["instances"].to("cpu").pred_keypoints.numpy()
    
    #get class label based on the filename
    classlabel = filename.rsplit('/')[-1].replace(".jpg","")
    
    #save in the dataframe
    df = add_to_df(df, pred_keypoints[0], classlabel)

In [None]:
df.head(n=5)

In [None]:
# define the features and target variable
X = df.drop(columns=['letter'])
y = df['letter']

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X = scaler.fit_transform(X)

In [None]:
class2idx = {'A': 0, 'B': 1, "C": 2, "D": 3, 'E': 4, 'F': 5, "G": 6, "H": 7, 'I': 8, 'J': 9, 'K': 10, "L": 11, "M": 12, 'N': 13, 'O': 14, 'P': 15, "Q": 16, "R": 17, 'S': 18, 'T': 19, 'U': 20, "V": 21, "W": 22, "X": 23, 'Y': 24, 'Z': 25, 'SPACE': 26, 'START-STOP': 27}
idx2class = {v: k for k, v in class2idx.item()}

In [None]:
y.replace(class2idx, inplace=True)

In [None]:
X_tensor = torch.tensor(X)

In [None]:
y_tensor = torch.tensor(y.values)

In [None]:
X_tensor.shape

In [None]:
y_tensor.shape

In [None]:
# create a dataloader
from torch.utils.data import Dataset, Dataloader
class Data(Dataset):
    def __init__(self):
        self.x=X_tensor
        self.y=y_tensor
        self.len=self.x.shape[0]
    def __getitem__(self,index):
        return self.x[index],self.y[index]
    def __len__(self):
        return self.len

In [None]:
dataset = Data()

In [None]:
trainloader = DataLoader(dataset=dataset, batch_size=64)

### Create classification model using multi-layered perceptron

In [None]:
import torch.nn as nn
import torch.optim as optim

In [None]:
class MulticlassClassification(nn.Module):
    def __init__(self, num_feature, num_class):
        super(MulticlassClassification, self).__init__()
        
        self.layer_1 = nn.Linear(num_feature, 512)
        self.layer_2 = nn.Linear(512, 128)
        self.layer_3 = nn.Linear(128, 64)
        self.layer_out = nn.Linear(64, num_class)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.batchnorm3 = nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
EPOCHS = 1000
BATCH_SIZE = 64
LEARNING_RATE = 0.0001
NUM_FEATURES = 34
NUM_CLASSES = 28

In [None]:
model = MulticlassClassification(num_feature = NUM_FEATURES, num_class = NUM_CLASSES)
model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
print(model)

In [None]:
print('W:',list(model.parameters())[0].size())
print('b',list(model.parameters())[1].size())

In [None]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc * 100)
    
    return acc

In [None]:
accuracy_stats = {
    'train': [],
    'val': []
}
loss_stats = {
    'train': [],
    'val': []
}

In [None]:
for e in range(1, EPOCHS+1):
    
    train_epoch_loss = 0
    train_epoch_acc = 0
    
    # set model to training mode
    model.train()
    
    for X_train_batch, y_train_batch in trainloader:
        X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
        optimizer.zero_grad()
        
        # perform prediction
        y_train_pred = model(X_train_batch)
        
        # compute for loss and accuracy
        train_loss = criterion(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)
        
        # optimization
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
    loss_stats['train'].append(train_epoch_loss/len(trainloader))
    accuracy_stats['train'].append(train_epoch_acc/len(trainloader))
    
    print(f'Epoch {e+0:03}: | Train Loss: {train_epoch_loss/len(trainloader):.5f} | Train Acc: {train_epoch_acc/len(trainloader):.5f})

# Inference using Detectron2 and Classifier Model

In [None]:
cfg = get_cfg()

# cfg.MODEL.DEVICE = "cpu"
# load the pre trained model from Detectron2 model zoo
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
# set confidence threshold for this model
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  
# load model weights
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")

In [None]:
import matplotlib.pyplot as plt
im = cv2.imread("demo11.jpeg")

def cv2_imshow(im):
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(25,7.5)), plt.imshow(im), plt.axis('off');
    
# test the model using the image
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

In [None]:
pred_keypoints = outputs["instances"].to("cpu").pred_keypoints.numpy()
pred_keypoints[0]