In [1]:
import numpy  as np
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
import torch.nn.functional as F
from scipy.stats import beta

In [2]:
# Build CustomedDataset
# Separate the given dataset into two parts, four for train and one for test

class CustomedDataSet(Dataset):
    def __init__(self, train=True):
        self.train = train
        if self.train :
            trainX = pd.read_csv('./train.csv',nrows=33600)
            trainY = trainX.label.as_matrix().tolist()
            trainX = trainX.drop('label',axis=1).as_matrix().reshape(trainX.shape[0], 1, 28, 28)
            self.datalist = trainX
            self.labellist = trainY
        else:
            testX = pd.read_csv('./train.csv')
            testX = testX.drop(testX.head(33600).index)
            testY = testX.label.as_matrix().tolist()
            testX = testX.drop('label',axis=1).as_matrix().reshape(testX.shape[0], 1, 28, 28)
            self.datalist = testX
            self.labellist = testY
            
    def __getitem__(self, index):
        if self.train:
            return torch.Tensor(self.datalist[index].astype(float)),self.labellist[index]
        else:
            return torch.Tensor(self.datalist[index].astype(float)),self.labellist[index]
    
    def __len__(self):
        return self.datalist.shape[0]

In [3]:
batch_size = 100

In [4]:
train_dataset = CustomedDataSet()
test_dataset  = CustomedDataSet(train=False)
train_loader  = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True,num_workers=2)
test_loader   = DataLoader(dataset= test_dataset,batch_size=batch_size,shuffle=False)

  if __name__ == '__main__':
  # Remove the CWD from sys.path while we load stuff.
  app.launch_new_instance()


In [5]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet,self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels = 1, out_channels = 64, kernel_size = 8, stride = 2, padding = 1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels = 64, out_channels = 192, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels = 192, out_channels = 384, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size = 2, stride =2),
            
            nn.Conv2d(in_channels = 384, out_channels = 256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(in_channels = 256, out_channels = 256, kernel_size = 3, stride = 1, padding = 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
        )   
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256*3*3,1024),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(1024,64),
            nn.ReLU(inplace = True),
            nn.Linear(64,10),
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), 256*3*3)
        x = self.classifier(x)
        return x

In [6]:
alexnet = AlexNet()

In [7]:
# Hyper Parameters

num_epochs = 5
learning_rate = 0.001

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(alexnet.parameters(),lr=learning_rate)

In [9]:
# Mixup input data
def mixup_data(x, y,alpha):
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index,:]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

In [10]:
# The loss function will change of course
def mixup_criterion(y_a, y_b, lam):
    return lambda criterion, pred: lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [11]:
for alpha in [0.1,0.2,0.4,0.8]:
    for epoch in range(num_epochs):
        for i, (x, y) in enumerate(train_loader):
            images, y_a, y_b, lam = mixup_data(x, y, alpha)
            images = Variable(images)
            y_a = Variable(y_a)
            y_b = Variable(y_b)
        
            optimizer.zero_grad()
            outputs = alexnet(images)
            loss_func = mixup_criterion(y_a, y_b, lam)
            loss = loss_func(criterion, outputs)
            loss.backward()
            optimizer.step()
            if (i+1) % 336 == 0:
                print ('Alpha %.1f, Epoch [%d/%d], Loss: %.4f' %(alpha, epoch+1, num_epochs, loss.item()))
    
    ans = torch.LongTensor()
    for (images,labels) in test_loader:
        images = Variable(images)
        outputs = alexnet(images)
        _,predicted = torch.max(outputs.data, 1)
        ans = torch.cat((ans,predicted),0)
    
    ans = ans.numpy()
    
    corr=0
    for i,(images,labels) in enumerate(test_dataset):
        if ans[i]==labels:
            corr=corr+1
    print("Accu rate at Alpha=%.1f: %.3f"%(alpha,corr/84))

Alpha 0.1, Epoch [1/5], Loss: 0.1089
Alpha 0.1, Epoch [2/5], Loss: 0.0785
Alpha 0.1, Epoch [3/5], Loss: 0.1379
Alpha 0.1, Epoch [4/5], Loss: 1.5037
Alpha 0.1, Epoch [5/5], Loss: 0.1833
Accu rate at Alpha=0.1: 98.440
Alpha 0.2, Epoch [1/5], Loss: 0.4260
Alpha 0.2, Epoch [2/5], Loss: 0.1383
Alpha 0.2, Epoch [3/5], Loss: 0.7242
Alpha 0.2, Epoch [4/5], Loss: 0.2237
Alpha 0.2, Epoch [5/5], Loss: 0.2585
Accu rate at Alpha=0.2: 98.774
Alpha 0.4, Epoch [1/5], Loss: 0.2867
Alpha 0.4, Epoch [2/5], Loss: 0.6179
Alpha 0.4, Epoch [3/5], Loss: 0.0672
Alpha 0.4, Epoch [4/5], Loss: 0.9253
Alpha 0.4, Epoch [5/5], Loss: 0.6231
Accu rate at Alpha=0.4: 99.000
Alpha 0.8, Epoch [1/5], Loss: 0.9927
Alpha 0.8, Epoch [2/5], Loss: 0.4742
Alpha 0.8, Epoch [3/5], Loss: 0.6707
Alpha 0.8, Epoch [4/5], Loss: 0.8598
Alpha 0.8, Epoch [5/5], Loss: 0.8166
Accu rate at Alpha=0.8: 98.952
