In [1]:
import numpy  as np
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from scipy.stats import beta
from torch.utils.data import TensorDataset,DataLoader,Dataset

In [2]:
# Hyper Parameters

num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [3]:
# Build CustomedDataset
# Separate the given dataset into two parts, four for train and one for test

class CustomedDataSet(Dataset):
    def __init__(self, train=True):
        self.train = train
        if self.train :
            trainX = pd.read_csv('./train.csv',nrows=33600)
            trainY = trainX.label.as_matrix().tolist()
            trainX = trainX.drop('label',axis=1).as_matrix().reshape(trainX.shape[0], 1, 28, 28)
            self.datalist = trainX
            self.labellist = trainY
        else:
            testX = pd.read_csv('./train.csv')
            testX = testX.drop(testX.head(33600).index)
            testY = testX.label.as_matrix().tolist()
            testX = testX.drop('label',axis=1).as_matrix().reshape(testX.shape[0], 1, 28, 28)
            self.datalist = testX
            self.labellist = testY
            
    def __getitem__(self, index):
        if self.train:
            return torch.Tensor(self.datalist[index].astype(float)),self.labellist[index]
        else:
            return torch.Tensor(self.datalist[index].astype(float)),self.labellist[index]
    
    def __len__(self):
        return self.datalist.shape[0]

In [4]:
train_dataset = CustomedDataSet()
test_dataset  = CustomedDataSet(train=False)

  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  app.launch_new_instance()


In [5]:
train_loader = DataLoader(dataset= train_dataset,batch_size=batch_size,shuffle=False)
test_loader  = DataLoader(dataset = test_dataset,batch_size=batch_size,shuffle=False)

In [6]:
# CNN Model

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1 ,16, kernel_size=5,padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5,padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(7*7*32, 10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [7]:
cnn = CNN()

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(),lr=learning_rate)

In [9]:
# Mixup input data
def mixup_data(x, y,alpha):
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index,:]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

In [10]:
# The loss function will change of course
def mixup_criterion(y_a, y_b, lam):
    return lambda criterion, pred: lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [18]:
for alpha in [0.1,0.2,0.4,0.8]:
    for epoch in range(num_epochs): 
        for i,(x,y) in enumerate(train_loader):  
            images, y_a, y_b, lam = mixup_data(x, y, alpha)
            images = Variable(images)
            y_a = Variable(y_a)
            y_b = Variable(y_b)

            optimizer.zero_grad()
            outputs = cnn(images)
            loss_func = mixup_criterion(y_a, y_b, lam)
            loss = loss_func(criterion, outputs)
            loss.backward()
            optimizer.step()
            if (i+1) % 336 == 0:
                print ('Alpha %.1f, Epoch [%d/%d], Loss: %.4f' %(alpha, epoch+1, num_epochs, loss.item()))

    ans = torch.LongTensor()
    for (images,labels) in test_loader:
        images = Variable(images)
        outputs = cnn(images)
        _,predicted = torch.max(outputs.data, 1)
        ans = torch.cat((ans,predicted),0)
    
    ans = ans.numpy()
    
    corr=0
    for i,(images,labels) in enumerate(test_dataset):
        if ans[i]==labels:
            corr=corr+1
    print("Accu rate at Alpha=%.1f: %.3f"%(alpha,corr/84))

Alpha 0.1, Epoch [1/5], Loss: 0.0230
Alpha 0.1, Epoch [2/5], Loss: 0.0184
Alpha 0.1, Epoch [3/5], Loss: 0.0182
Alpha 0.1, Epoch [4/5], Loss: 0.9425
Alpha 0.1, Epoch [5/5], Loss: 0.0131
Accu rate at Alpha=0.1: 99.024
Alpha 0.2, Epoch [1/5], Loss: 0.5627
Alpha 0.2, Epoch [2/5], Loss: 0.0300
Alpha 0.2, Epoch [3/5], Loss: 0.9052
Alpha 0.2, Epoch [4/5], Loss: 0.8607
Alpha 0.2, Epoch [5/5], Loss: 0.6564
Accu rate at Alpha=0.2: 98.964
Alpha 0.4, Epoch [1/5], Loss: 0.7347
Alpha 0.4, Epoch [2/5], Loss: 0.0361
Alpha 0.4, Epoch [3/5], Loss: 0.0727
Alpha 0.4, Epoch [4/5], Loss: 0.7284
Alpha 0.4, Epoch [5/5], Loss: 0.0343
Accu rate at Alpha=0.4: 99.060
Alpha 0.8, Epoch [1/5], Loss: 0.6801
Alpha 0.8, Epoch [2/5], Loss: 0.6122
Alpha 0.8, Epoch [3/5], Loss: 0.6418
Alpha 0.8, Epoch [4/5], Loss: 0.7891
Alpha 0.8, Epoch [5/5], Loss: 0.5389
Accu rate at Alpha=0.8: 98.857
