#1.Data Preprocessing

Read two training csv files into one pandas dataframe, create custom Pytorch Dataset for training, validation, and testing sets, and read all into dataloaders

In [0]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
import torch.nn as nn
import torch
import torch.optim as optim
from torch.autograd import Variable as var
from torchvision.models import densenet121
import torchvision

In [0]:
test_df = pd.read_csv('test.csv')
train_df = pd.read_csv('train.csv')
additional_train_df = pd.read_csv('Dig-MNIST.csv')
train_df = train_df.append(additional_train_df)

In [0]:
imgs = train_df.iloc[:,1:]
labels = train_df['label']
#sklearn's train_test_split is a much simpler function we can use to create a training/validation set than using 
#Pytorch's torch.utils.data.random_split function
train_x,val_x,train_label,val_label = train_test_split(imgs,labels,test_size=0.2)

In [117]:
print(test_df.iloc[:,1:])

      pixel0  pixel1  pixel2  pixel3  ...  pixel780  pixel781  pixel782  pixel783
0          0       0       0       0  ...         0         0         0         0
1          0       0       0       0  ...         0         0         0         0
2          0       0       0       0  ...         0         0         0         0
3          0       0       0       0  ...         0         0         0         0
4          0       0       0       0  ...         0         0         0         0
...      ...     ...     ...     ...  ...       ...       ...       ...       ...
4995       0       0       0       0  ...         0         0         0         0
4996       0       0       0       0  ...         0         0         0         0
4997       0       0       0       0  ...         0         0         0         0
4998       0       0       0       0  ...         0         0         0         0
4999       0       0       0       0  ...         0         0         0         0

[5000 rows x 78

In [0]:
class ds(Dataset):
    def __init__(self,imgs,labels,transform,train_test):
        self.train_test = train_test
        if self.train_test:
            self.train_imgs = np.asarray(imgs)
            self.train_labels = np.asarray(labels)
            self.train_len = len(self.train_imgs)
        else:
            self.test_imgs = imgs
            self.test_len = len(self.test_imgs)
        self.transform = transform
    def __len__(self):
        if self.train_test:
            return self.train_len
        else:
            return self.test_len
    def __getitem__(self,index):
        if self.train_test:
            img = self.train_imgs[index]
            label = self.train_labels[index]
            img = np.asarray(img).reshape(28,28).astype('uint8')
            img = Image.fromarray(img).convert('L')
            if self.transform is not None:
                img = self.transform(img)
            return (img,label)
        else:
            img = self.test_imgs[index]
            img = np.asarray(img).reshape(28,28).astype('uint8')
            img = Image.fromarray(img).convert('L')
            if self.transform is not None:
                img = self.transform(img)
            return img

In [0]:
class ds2(Dataset):
    def __init__(self,imgs,labels,transform,train_test):
        self.train_test = train_test
        if self.train_test:
            self.train_imgs = np.asarray(imgs)
            self.train_labels = np.asarray(labels)
            self.train_len = len(self.train_imgs)
        else:
            self.test_imgs = np.asarray(imgs)
            self.test_len = len(self.test_imgs)
        self.transform = transform
    def __len__(self):
        if self.train_test:
            return self.train_len
        else:
            return self.test_len
    def __getitem__(self,index):
        if self.train_test:
            img = self.train_imgs[index]
            label = self.train_labels[index]
            img = np.asarray(img).reshape(28,28).astype('uint8')
            img = Image.fromarray(img).convert('RGB')
            if self.transform is not None:
                img = self.transform(img)
            return (img,label)
        else:
            img = self.test_imgs[index]
            img = np.asarray(img).reshape(28,28).astype('uint8')
            img = Image.fromarray(img).convert('RGB')
            if self.transform is not None:
                img = self.transform(img)
            return img

In [0]:
transform = transforms.Compose([
    transforms.ToTensor(),     
])

In [0]:
transform_aug = transforms.Compose([
    transforms.RandomCrop(28),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.ToTensor(),     
])

In [0]:
train_ds = ds(train_x,train_label,transform_aug,True)
val_ds = ds(val_x,val_label,transform,True)
test_ds = ds(test_df.iloc[:,1:],None,transform,False)

In [0]:
train_ds2 = ds2(train_x,train_label,transform_aug,True)
val_ds2 = ds2(val_x,val_label,transform,True)
test_ds2 = ds2(test_df.iloc[:,1:],None,transform,False)

In [0]:
batch_size = 32
train_dl = DataLoader(train_ds2,batch_size=batch_size,shuffle=True)
val_dl = DataLoader(val_ds2,batch_size=batch_size,shuffle=False)
test_dl = DataLoader(test_ds2,batch_size=5000)

#2.Define Model

Training a CNN with a simple architecture similar to what achieves 97-98% validation accuracy on MNIST will not perform as well on the Kannada dataset. To achieve higher accuracy, let us finetune a pretrained CNN.                          https://arxiv.org/ftp/arxiv/papers/1901/1901.06032.pdf#page=17 Shows a list of some different CNN architectures, most of which are available to be used on PyTorch!

In [0]:
#Similar to what I implemented for MINST, but does not perform as well! Also is very inconsistent as far as accuracy
#goes.
class mycnn(nn.Module):
    def __init__(self):
        super(mycnn,self).__init__()
        self.cnn1 = nn.Conv2d(1,3,5)
        self.cnn2 = nn.Conv2d(3,2,5)
        self.linear = nn.Linear(800,400)
        self.linear2 = nn.Linear(400,10)
        self.dropout = nn.Dropout()
        self.relu = nn.ReLU()
    def forward(self,x):
        n = x.size()[0]
        #print('input size:',x.size())
        x = self.relu(self.cnn1(x))
        #print('x output size:',x.size())
        x = self.relu(self.cnn2(x))
        #print('output size:',x.size())
        x = x.view(n,-1)
        #print('size of x:',x.size())
        x = self.relu(self.linear(self.dropout(x)))
        return x


Let use use DenseNet. Note below that the CNN is expecting RGB images, and changing the input channels to 3 would render the rest of the weights useless. Therefore, we need to make our grayscale images appear RGB by repeating the input image 3 times on a new dimension!

In [98]:
densenet121(pretrained=True)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [99]:
a = densenet121(pretrained=True)
print(a.classifier)

Linear(in_features=1024, out_features=1000, bias=True)


In [100]:
a.classifier.in_features

1024

In [101]:
print(a.features.conv0)

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)


In [0]:
class densenet(nn.Module):
    def __init__(self):
        super(densenet,self).__init__()
        self.densenet = densenet121(pretrained=True)
        #self.densenet.features.conv0 = Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        for param in self.densenet.parameters():
            param.requires_grad = False
        #https://github.com/pytorch/vision/issues/1231
        #https://github.com/pytorch/pytorch/pull/22304
        #Issue with H,W size here, so add these lines of code to resolve!
        for x in self.densenet.modules():
            if isinstance(x, nn.AvgPool2d):
                x.ceil_mode = True
        self.densenet.classifier = nn.Sequential(nn.Linear(1024,10))
        for m in self.densenet.classifier:
            torch.nn.init.kaiming_normal_(m.weight)
        #print(self.densenet)
    def forward(self,x):
        return self.densenet.forward(x)

#3.Training

In [0]:
n_epoch = 25
lr = 1e-3
net = densenet().cuda()
#net = mycnn().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(),lr=lr)
n_print = 100

In [0]:
def validate(model,data):
  # To get validation accuracy = (correct/total)*100.
  total = 0
  correct = 0
  for i,(images,labels) in enumerate(data):
    images = var(images.cuda())
    x = model(images)
    value,pred = torch.max(x,1)
    pred = pred.data.cpu()
    total += x.size(0)
    correct += torch.sum(pred == labels)
  return correct*100./total

In [105]:
for e in range(n_epoch):
  for i,(images,labels) in enumerate(train_dl):
    images = var(images.cuda())
    labels = var(labels.cuda())
    optimizer.zero_grad()
    pred = net(images)
    loss = criterion(pred,labels)
    loss.backward()
    optimizer.step()
    if (i+1) % n_print == 0:
      accuracy = float(validate(net,val_dl))
      print('Epoch :',e+1,'Batch :',i+1,'Loss :',float(loss.data),'Accuracy :',accuracy,'%')


Epoch : 1 Batch : 100 Loss : 1.7385153770446777 Accuracy : 44.17710876464844 %
Epoch : 1 Batch : 200 Loss : 1.3519468307495117 Accuracy : 58.11503601074219 %
Epoch : 1 Batch : 300 Loss : 1.2034697532653809 Accuracy : 63.68878173828125 %
Epoch : 1 Batch : 400 Loss : 1.2892407178878784 Accuracy : 67.59680938720703 %
Epoch : 1 Batch : 500 Loss : 1.2625585794448853 Accuracy : 69.45472717285156 %
Epoch : 1 Batch : 600 Loss : 0.8023688197135925 Accuracy : 70.50113677978516 %
Epoch : 1 Batch : 700 Loss : 0.9457569122314453 Accuracy : 72.23804473876953 %
Epoch : 1 Batch : 800 Loss : 1.0544648170471191 Accuracy : 73.12784576416016 %
Epoch : 1 Batch : 900 Loss : 0.7599385380744934 Accuracy : 73.73291778564453 %
Epoch : 1 Batch : 1000 Loss : 1.3845926523208618 Accuracy : 73.88951873779297 %
Epoch : 1 Batch : 1100 Loss : 0.834579348564148 Accuracy : 73.29157257080078 %
Epoch : 1 Batch : 1200 Loss : 1.1266205310821533 Accuracy : 74.2525634765625 %
Epoch : 1 Batch : 1300 Loss : 0.8298630118370056 Ac

#4.Kaggle Submission

In [0]:
index = range(0,test_ds2.test_len)
columns = ['ImageId','Label']
df = pd.DataFrame(index=index,columns=columns)

In [150]:
for i, data in enumerate(test_dl, 0):
    images = data
    images = images.cuda()
    print(images.size())
    out = net(images)
    _, predicted = torch.max(out, 1)
    print(predicted.size())
    #print(predicted[0].item())
    for j in range(predicted.size()[0]):
        df.iloc[j,:] = [j,predicted[j].item()] 

torch.Size([5000, 3, 28, 28])
torch.Size([5000])


In [151]:
print(df)

      ImageId  Label
0           0      3
1           1      0
2           2      6
3           3      6
4           4      7
...       ...    ...
4995     4995      1
4996     4996      0
4997     4997      1
4998     4998      6
4999     4999      3

[5000 rows x 2 columns]


In [0]:
df.to_csv('submission.csv',index=False)