In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 
from __future__ import print_function
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import os
import os.path as osp
import torchvision.models as model
import torch.nn as nn
import torchvision.datasets as dset
from torch.utils.data import DataLoader
import torch
import torchvision.transforms as transforms
from PIL import Image
import argparse
import torch.nn.functional as F
import torch.optim as optim
import time

# Any results you write to the current directory are saved as output.
train_label = '../input/modified-mnist/train_max_y.csv'
train_data = '../input/modified-mnist/train_max_x'
test_data = '../input/modified-mnist/test_max_x'
epochs = 200
batch_size = 64
train_portion = 0.8
lr = 0.005

#print(os.listdir("../input/modified-mnist")) 
#print(osp.exists(train_label))
#print(osp.exists(train_data))
#print(osp.exists(test_data))

In [2]:
class MyFolder(dset.DatasetFolder):
    
    def __init__(self, data='train', transform=None):
        if data == 'train':
            self.images = pd.read_pickle(train_data).reshape(50000, 1, 128, 128)
            self.labels = [x for [y, x] in pd.read_csv(train_label).to_numpy()]
        elif data == 'test':
            self.images = pd.read_pickle(test_data).reshape(10000, 1, 128, 128)
            self.labels = None
        
        self.transform = transform
        self.mean = self.images.mean()
        self.std = self.images.std()
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        label = -1
        
        image = self.images[index]
        #image[image>150] = 0
        image = Image.fromarray(image.reshape(128, 128))
        if self.labels is not None:
            label = self.labels[index]
            
        if self.transform is not None:
            image = self.transform(image)
            image -= self.mean
            image /= self.std
        
        image = image.reshape(1, 128, 128)
        
        return image, label
    

ts = transforms.Compose([
    transforms.RandomCrop(128, padding=4),
    transforms.ToTensor(),
    #transforms.Normalize(104.43527, 56.309097),
])

train_set = MyFolder(transform=ts)

In [3]:
num_train = len(train_set)
indices = list(range(num_train))
split = int(np.floor(train_portion * num_train))

# Instantiate dataloader
train_dataloader = DataLoader(train_set, 
                              batch_size=batch_size, 
                              sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
                              num_workers=4)
valid_dataloader = DataLoader(train_set, 
                              batch_size=batch_size, 
                              sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), 
                              num_workers=4)

In [4]:
class CustNet(nn.Module):
    
    def __init__(self, c_in, c_out): 
        #c_in = how many channel 
        super(CustNet, self).__init__()
        self.model = nn.Sequential(
                #input shape = (batchsize, channel, H, W)
                #channel increase.
                #H, W decrease
                nn.Conv2d(c_in, 32, kernel_size=3, padding=1, stride=1),
                nn.Conv2d(32, 32, kernel_size=3, stride=2),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=1),
                nn.Conv2d(64, 64, kernel_size=3, stride=2),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.Conv2d(64, 128, kernel_size=3, padding=1, stride=1),
                nn.Conv2d(128, 128, kernel_size=3, stride=2),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=1),
                nn.Conv2d(256, 512, kernel_size=3, stride=2),
                nn.BatchNorm2d(512),
                nn.ReLU() #effective 
        )
        self.flat = nn.AdaptiveMaxPool2d(1)
        self.fc = nn.Sequential(
                nn.Linear(512, 256),
                nn.ReLU(),
                nn.Linear(256, c_out)
        )
    
    def forward(self, x):
        x = self.model(x)
        x = self.flat(x)
       #print (x.shape)
        x = self.fc(x.reshape(64,512))
        return x

In [5]:
m = CustNet(1, 10).cuda()

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 5, 1) #convolution layer (minibatch,in_channels,iH,iW)
        self.conv2 = nn.Conv2d(32, 64, 5, 1)
        self.fc1 = nn.Linear(256, 512) #filters - linear transformations (N, weight)
        self.fc2 = nn.Linear(512, 10) #10 bc 10 digits

    def forward(self, x): #x=tensor, passed through above method - 2 conv layers, 2 filters
        x = F.relu(self.conv1(x)) #ReLU activation function
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2) #(kernel, stride size, padding)
        print(x.shape)
        x = x.view(-1, 256)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x #probability distribution consisting of K probabilities.

In [7]:
m = Net().cuda()

In [8]:
res = model.resnet18().cuda()
res.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False).cuda()
res.conv2 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False).cuda()
res.fc = nn.Linear(512, 10).cuda()
res.fc = nn.Linear(512, 10).cuda()


In [9]:
optimizer = torch.optim.Adam(m.parameters(),lr) #has variable of model name
citerion = nn.CrossEntropyLoss().cuda()
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)

In [10]:
class AvgrageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.cnt = 0

    def update(self, val, n=1):
        self.sum += val * n
        self.cnt += n
        self.avg = self.sum / self.cnt

In [11]:
def train(resnet):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()
    resnet.train()
    
    for i, (pic, label) in enumerate(train_dataloader):
        label = label.cuda()
        pic = pic.cuda()
        
        optimizer.zero_grad()
        logits = resnet(pic)
        loss = citerion(logits, label)
        
        loss.backward()
        optimizer.step()
        prec1, prec5 = accuracy(logits, label, topk=(1, 5))
        n = pic.size(0)
        objs.update(loss.item(), n)
        top1.update(prec1.item(), n)
        top5.update(prec5.item(), n)
        
        return top1.avg, top5.avg, objs.avg
    
def valid(resnet):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()
    resnet.eval()
    
    for i, (pic, label) in enumerate(valid_dataloader):
        label = label.cuda()
        pic = pic.cuda()
        
        logits = resnet(pic)
        loss = citerion(logits, label)
        
        prec1, prec5 = accuracy(logits, label, topk=(1, 5))
        n = pic.size(0)
        objs.update(loss.item(), n)
        top1.update(prec1.item(), n)
        top5.update(prec5.item(), n)
        
        return top1.avg, top5.avg, objs.avg
    
def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0/batch_size))
    return res



In [12]:
for epoch in range(epochs):
    #scheduler.step()
    print('-'*20)
    t1, _, lo = train(m)
    print('Train Acc: ', t1, ' Loss: ', lo)
    t1, _, lo = valid(m)
    print('Valid Acc: ', t1, ' Loss: ', lo)

--------------------
torch.Size([64, 64, 29, 29])


ValueError: Expected input batch_size (13456) to match target batch_size (64).

In [13]:
arr = y.data.cpu().numpy()
# write CSV
np.savetxt('output.csv', arr)

NameError: name 'y' is not defined

In [14]:
t1, t5, lo = valid(m)
print('Final valid top1 acc: ', t1, "valid top 5 acc: ", t5, ' Loss: ', lo)

torch.Size([64, 64, 29, 29])


ValueError: Expected input batch_size (13456) to match target batch_size (64).

In [15]:

#Setting up class of neural net - 2 layers and 2 filters
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1) #convolution layer (minibatch,in_channels,iH,iW)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(800, 500) #filters - linear transformations (N, weight)
        self.fc2 = nn.Linear(500, 10) #10 bc 10 digits

    def forward(self, x): #x=tensor, passed through above method - 2 conv layers, 2 filters
        x = F.relu(self.conv1(x)) #ReLU activation function
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2) #(kernel, stride size, padding)
        x = x.view(-1, 800)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1) #probability distribution consisting of K probabilities.

#Code for training the model
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (dset, target) in enumerate(train_loader):
        dset, target = data.to(device), target.to(device)
        optimizer.zero_grad() #zero gradient so we don't hold the gradients and artificially lower gradients.
        output = model(dset)
        loss = F.nll_loss(output, target)
        loss.backward() #backpropagation
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(dset), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for dset, target in test_loader:
            dset, target = dset.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset) #normalization

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')

    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader( #loading data from TorchVision
        datasets.MNIST('data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.test_batch_size, shuffle=True, **kwargs)

    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")


if __name__ == '__main__':
    print("Hello code run")
    time.sleep(10)
    main()


Hello code run


usage: ipykernel_launcher.py [-h] [--batch-size N] [--test-batch-size N]
                             [--epochs N] [--lr LR] [--momentum M] [--no-cuda]
                             [--seed S] [--log-interval N] [--save-model]
ipykernel_launcher.py: error: unrecognized arguments: -f /tmp/tmpihvvhjk3.json --HistoryManager.hist_file=:memory:


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
