### Testing our models

This notebook will load saved model and predict the image labels

In [1]:
import torch
from torchvision.transforms import transforms
from PIL import Image
from pathlib import Path
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader

In [38]:
import os
import numpy as np
import pandas as pd
from skimage import io
from PIL import Image as im

### Step 1: Let's get the images ready

In [3]:
class BengaliCharacterDataset(Dataset):
    """Bengali language handwritten character dataset."""
    
    def __init__(self, root_dir, transform=None):
        """
        Args:
            dataframe (string): Dataframe of parquet dataset with image data.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):        
        return len(os.listdir(self.root_dir))

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image_id = 'Test_'+str(idx)
        img_name = image_id+'.png'
        img_path = os.path.join(self.root_dir,img_name)
        image = io.imread(img_path)
        
        if self.transform:
            image = self.transform(im.fromarray(image).convert('RGB'))

        return image

In [4]:
transformed_data = BengaliCharacterDataset('/home/ubuntu/datasets/bengali-ai/testing_images/testing_224by224/',
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.0692,0.0692,0.0692), (0.2051,0.2051,0.2051))
                   ]))

In [5]:
tt = transformed_data.__getitem__(0)

In [6]:
tt

tensor([[[4.2323, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3088, 4.3088,  ..., 4.3088, 4.2706, 4.3088],
         ...,
         [4.3088, 4.3471, 4.3471,  ..., 4.3088, 4.3471, 4.3471],
         [4.3088, 4.3088, 4.3088,  ..., 4.3088, 4.3471, 4.3088],
         [4.2897, 4.3088, 4.3088,  ..., 4.3471, 4.3088, 4.2897]],

        [[4.2323, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3088, 4.3088,  ..., 4.3088, 4.2706, 4.3088],
         ...,
         [4.3088, 4.3471, 4.3471,  ..., 4.3088, 4.3471, 4.3471],
         [4.3088, 4.3088, 4.3088,  ..., 4.3088, 4.3471, 4.3088],
         [4.2897, 4.3088, 4.3088,  ..., 4.3471, 4.3088, 4.2897]],

        [[4.2323, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3088, 4.3088,  ..., 4.3088, 4.2706, 4.

In [7]:
tt.shape

torch.Size([3, 224, 224])

In [8]:
test_dataloader = DataLoader(transformed_data, 
                             batch_size=2,
                             #shuffle=True, 
                             num_workers=2)

In [9]:
len(test_dataloader)

6

In [10]:
test_dataloader.dataset.__getitem__(0)

tensor([[[4.2323, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3088, 4.3088,  ..., 4.3088, 4.2706, 4.3088],
         ...,
         [4.3088, 4.3471, 4.3471,  ..., 4.3088, 4.3471, 4.3471],
         [4.3088, 4.3088, 4.3088,  ..., 4.3088, 4.3471, 4.3088],
         [4.2897, 4.3088, 4.3088,  ..., 4.3471, 4.3088, 4.2897]],

        [[4.2323, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3088, 4.3088,  ..., 4.3088, 4.2706, 4.3088],
         ...,
         [4.3088, 4.3471, 4.3471,  ..., 4.3088, 4.3471, 4.3471],
         [4.3088, 4.3088, 4.3088,  ..., 4.3088, 4.3471, 4.3088],
         [4.2897, 4.3088, 4.3088,  ..., 4.3471, 4.3088, 4.2897]],

        [[4.2323, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3471, 4.3471,  ..., 4.3088, 4.3088, 4.3088],
         [4.2515, 4.3088, 4.3088,  ..., 4.3088, 4.2706, 4.

In [11]:
single_loaded_img = test_dataloader.dataset.__getitem__(0)

In [12]:
single_loaded_img.shape

torch.Size([3, 224, 224])

In [13]:
# As the model expects 4 dimentional input (the batch size as 4th) we add another dimention to our 3-d image
reshaped = single_loaded_img.permute(0, 1, 2).unsqueeze(0)

In [14]:
reshaped.shape

torch.Size([1, 3, 224, 224])

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [16]:
single_loaded_img = reshaped.to(device)

### Step 2: Now that the image is ready, let's load the model

In [27]:
# model_ft = models.resnet152(pretrained=False)

In [29]:
# model_ft.load_state_dict(torch.load(Path('/home/ubuntu/codebase/my_github/kaggle-competitions/bengali-ai/approach_2/grapheme-root-base.pth')))

In [17]:
model = torch.load(Path('/home/ubuntu/codebase/my_github/kaggle-competitions/bengali-ai/approach_2/grapheme-root-base.pth'))

In [18]:
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [19]:
out_predict = model(single_loaded_img)

In [20]:
len(out_predict[0]), out_predict

(168, tensor([[-6.0045e-01, -1.3788e+00,  1.1822e+01,  1.9353e+01, -2.1014e+00,
           2.7605e+00, -1.6748e+00, -2.6956e-01,  2.7165e+00,  4.0505e+00,
           1.4720e+00, -2.7549e-01, -1.1045e+00,  2.9411e+00, -3.1077e+00,
          -2.7126e+00,  2.7479e+00, -4.9148e-01, -1.9170e-01, -1.1014e+00,
           1.5205e+00,  7.3781e+00,  4.1370e+00,  3.4330e+00, -1.1813e+00,
          -2.0427e+00, -2.9523e+00, -3.5055e+00, -2.3214e+00, -1.5819e+00,
          -5.5324e+00, -7.8466e-01, -3.5040e+00,  8.6332e-01,  2.0373e+00,
           4.0215e+00,  8.5344e+00,  1.6279e-02, -1.7384e-01, -2.9892e+00,
          -1.3945e+00, -1.4820e+00, -5.5973e-01,  2.4545e+00, -2.2017e+00,
          -2.7525e+00,  4.0158e+00, -2.5941e+00,  5.5110e+00,  4.1950e+00,
          -3.2202e+00, -5.2532e-01,  3.4832e-01, -1.2860e+00,  6.4003e-01,
          -1.9818e+00, -4.3382e-01, -7.9212e-01,  3.1256e-01, -1.0107e+00,
          -4.3904e+00, -4.3263e+00, -2.7801e+00, -3.7707e+00,  6.7309e+00,
           2.7148e+0

In [21]:
max_value, max_index = torch.max(out_predict,1)

In [22]:
# convert tensor to a simple number (class number)
predicted_class = max_index.cpu().detach().numpy()[0]

### Step 3: Lets' scale this

In [23]:
all_test_files = os.listdir('/home/ubuntu/datasets/bengali-ai/testing_images/testing_224by224/')

In [39]:
sample_submission = pd.read_csv("/home/ubuntu/datasets/bengali-ai/sample_submission.csv")

In [58]:
sample_submission.head(9)

Unnamed: 0,row_id,target
0,Test_0_consonant_diacritic,0
1,Test_0_grapheme_root,50
2,Test_0_vowel_diacritic,0
3,Test_1_consonant_diacritic,0
4,Test_1_grapheme_root,0
5,Test_1_vowel_diacritic,0
6,Test_2_consonant_diacritic,0
7,Test_2_grapheme_root,0
8,Test_2_vowel_diacritic,0


In [42]:
sample_submission.tail(4)

Unnamed: 0,row_id,target
32,Test_10_vowel_diacritic,0
33,Test_11_consonant_diacritic,0
34,Test_11_grapheme_root,0
35,Test_11_vowel_diacritic,0


In [43]:
actual_submission = sample_submission

In [54]:
actual_submission.loc[actual_submission['row_id'].str.contains('Test_0') & 
                  actual_submission['row_id'].str.contains('grapheme'),'target'] = 50

In [59]:
sample_submission.head(3)

Unnamed: 0,row_id,target
0,Test_0_consonant_diacritic,0
1,Test_0_grapheme_root,50
2,Test_0_vowel_diacritic,0


In [60]:
for i in range(0,len(all_test_files)):
    single_loaded_img = test_dataloader.dataset.__getitem__(i)
    reshaped_img = single_loaded_img.permute(0, 1, 2).unsqueeze(0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    single_loaded_img = reshaped_img.to(device)
    max_value, max_index = torch.max(model(single_loaded_img),1)
    predicted_class = max_index.cpu().detach().numpy()[0]
    print('Test_'+str(i)+'.png has grapheme root class = '+str(predicted_class))
    actual_submission.loc[actual_submission['row_id'].str.contains('Test_'+str(i)) & 
                  actual_submission['row_id'].str.contains('grapheme'),'target'] = predicted_class

Test_0.png has grapheme root class = 3
Test_1.png has grapheme root class = 93
Test_2.png has grapheme root class = 19
Test_3.png has grapheme root class = 115
Test_4.png has grapheme root class = 55
Test_5.png has grapheme root class = 115
Test_6.png has grapheme root class = 147
Test_7.png has grapheme root class = 137
Test_8.png has grapheme root class = 119
Test_9.png has grapheme root class = 133
Test_10.png has grapheme root class = 148
Test_11.png has grapheme root class = 21


In [26]:
model_consonant_diacritic = torch.load(Path('/home/ubuntu/codebase/my_github/kaggle-competitions/bengali-ai/approach_2/consonant-diacritic-base.pth'))

In [27]:
model_consonant_diacritic.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [62]:
for i in range(0,len(all_test_files)):
    single_loaded_img = test_dataloader.dataset.__getitem__(i)
    reshaped_img = single_loaded_img.permute(0, 1, 2).unsqueeze(0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    single_loaded_img = reshaped_img.to(device)
    max_value, max_index = torch.max(model_consonant_diacritic(single_loaded_img),1)
    predicted_class = max_index.cpu().detach().numpy()[0]
    print('Test_'+str(i)+'.png has consonant diacritic class = '+str(predicted_class))
    actual_submission.loc[actual_submission['row_id'].str.contains('Test_'+str(i)) & 
                  actual_submission['row_id'].str.contains('consonant'),'target'] = predicted_class

Test_0.png has consonant diacritic class = 0
Test_1.png has consonant diacritic class = 0
Test_2.png has consonant diacritic class = 0
Test_3.png has consonant diacritic class = 0
Test_4.png has consonant diacritic class = 0
Test_5.png has consonant diacritic class = 0
Test_6.png has consonant diacritic class = 5
Test_7.png has consonant diacritic class = 0
Test_8.png has consonant diacritic class = 0
Test_9.png has consonant diacritic class = 0
Test_10.png has consonant diacritic class = 4
Test_11.png has consonant diacritic class = 0


In [33]:
model_vowel_diacritic = torch.load(Path('/home/ubuntu/codebase/my_github/kaggle-competitions/bengali-ai/approach_2/vowel-diacritic-base.pth'))

In [34]:
model_vowel_diacritic.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [63]:
for i in range(0,len(all_test_files)):
    single_loaded_img = test_dataloader.dataset.__getitem__(i)
    reshaped_img = single_loaded_img.permute(0, 1, 2).unsqueeze(0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    single_loaded_img = reshaped_img.to(device)
    max_value, max_index = torch.max(model_vowel_diacritic(single_loaded_img),1)
    predicted_class = max_index.cpu().detach().numpy()[0]
    print('Test_'+str(i)+'.png has vowel diacritic class = '+str(predicted_class))
    actual_submission.loc[actual_submission['row_id'].str.contains('Test_'+str(i)) & 
                  actual_submission['row_id'].str.contains('vowel'),'target'] = predicted_class

Test_0.png has vowel diacritic class = 0
Test_1.png has vowel diacritic class = 2
Test_2.png has vowel diacritic class = 0
Test_3.png has vowel diacritic class = 0
Test_4.png has vowel diacritic class = 4
Test_5.png has vowel diacritic class = 2
Test_6.png has vowel diacritic class = 9
Test_7.png has vowel diacritic class = 7
Test_8.png has vowel diacritic class = 9
Test_9.png has vowel diacritic class = 10
Test_10.png has vowel diacritic class = 1
Test_11.png has vowel diacritic class = 2


In [64]:
actual_submission

Unnamed: 0,row_id,target
0,Test_0_consonant_diacritic,0
1,Test_0_grapheme_root,3
2,Test_0_vowel_diacritic,0
3,Test_1_consonant_diacritic,0
4,Test_1_grapheme_root,93
5,Test_1_vowel_diacritic,2
6,Test_2_consonant_diacritic,0
7,Test_2_grapheme_root,19
8,Test_2_vowel_diacritic,0
9,Test_3_consonant_diacritic,0


In [66]:
actual_submission.to_csv('/home/ubuntu/codebase/my_github/kaggle-competitions/bengali-ai/actual_submission.csv', index=False)