In [1]:
import numpy as np
import pandas as pd

import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.utils import resample

In [2]:
class Model(nn.Module):#Kudos to LeNet-5 and AlexNet!
    def __init__(self):
        super(Model,self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.conv2_bn = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3_bn = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=5)
        self.conv4_bn = nn.BatchNorm2d(64)
        self.gap = nn.AdaptiveAvgPool2d(3)
        self.fc = nn.Linear(64*3*3, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(self.conv1_bn(x))
        x = self.conv2(x)
        x = F.relu(F.max_pool2d(self.conv2_bn(x), 2))
        x = self.conv3(x)
        x = F.relu(self.conv3_bn(x))
        x = self.conv4(x)
        x = F.relu(F.max_pool2d(self.conv4_bn(x), 2))
        
        x = self.gap(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        # cross-entropy loss will use log-softmax as an activator to generate output layer
        return x

In [3]:
BATCH_SIZE = 64

### load
train_image = np.load("data/train.npy")
train_label = np.loadtxt("data/train.csv", dtype=np.int_, delimiter=",", skiprows=1)
train_label = (train_label[:,1]).reshape([-1,1])

#train_dataset_comb
train_comb = np.concatenate((train_image,train_label),axis=1)
#train_test_split
train_dataset, valid_dataset = train_test_split(train_comb, train_size=25000, test_size=5000, shuffle = True)

#bootstrap*5
train_dataset_b1 = torch.from_numpy(resample(train_dataset, replace=True, n_samples=25000, random_state=1))
train_dataset_b2 = torch.from_numpy(resample(train_dataset, replace=True, n_samples=25000, random_state=2))
train_dataset_b3 = torch.from_numpy(resample(train_dataset, replace=True, n_samples=25000, random_state=3))
train_dataset_b4 = torch.from_numpy(resample(train_dataset, replace=True, n_samples=25000, random_state=4))
train_dataset_b5 = torch.from_numpy(resample(train_dataset, replace=True, n_samples=25000, random_state=5))

train_loader_1 = Data.DataLoader(dataset=train_dataset_b1,batch_size=BATCH_SIZE,shuffle=True)
train_loader_2 = Data.DataLoader(dataset=train_dataset_b2,batch_size=BATCH_SIZE,shuffle=True)
train_loader_3 = Data.DataLoader(dataset=train_dataset_b3,batch_size=BATCH_SIZE,shuffle=True)
train_loader_4 = Data.DataLoader(dataset=train_dataset_b4,batch_size=BATCH_SIZE,shuffle=True)
train_loader_5 = Data.DataLoader(dataset=train_dataset_b5,batch_size=BATCH_SIZE,shuffle=True)

#valid_dataset, should not be shuffled when using ensemble learning (this is after Kaggle)
valid_loader = Data.DataLoader(dataset=valid_dataset,batch_size=BATCH_SIZE,shuffle=False)

#test_dataset
test_dataset = np.load("data/test.npy")
test_dataset = torch.from_numpy(test_dataset)
test_loader = Data.DataLoader(dataset=test_dataset,batch_size=BATCH_SIZE,shuffle=False)

In [4]:
def init_model():
    model = Model()
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.005)
    scheduler = optim.lr_scheduler.StepLR(optimizer, 15, 0.5)
    
    return model, loss_func, optimizer, scheduler

In [5]:
def train_model(model_comb, train_loader):
    train_accuracy = 0
    accumulate_loss = 0
    
    model, loss_func, optimizer, scheduler = model_comb
    model.train()
    
    for comb in train_loader:
        X = comb[:,:-1]
        label = comb[:,-1]
        X = X.view(torch.Size([-1, 1, 28, 28]))
        X = X.float()
        label = label.long()
        
        X = Variable(X)          
        label = Variable(label)

        train_out = model(X) #正向传播
        train_loss = loss_func(train_out,label)  #求损失值
        optimizer.zero_grad()       #优化器梯度归零
        train_loss.backward()    #反向转播，刷新梯度值
        optimizer.step()        #优化器运行一步，注意optimizer搜集的是model的参数
        
        _, pred = train_out.max(1)
        train_accuracy += int((pred == label).sum()) / X.shape[0]
        accumulate_loss += float(train_loss)
    
    scheduler.step()
    print("train_accuracy:" + ' '+str(train_accuracy / len(train_loader)))
    print("train_loss:" + ' ' + str(accumulate_loss / len(train_loader)))  
    
    return model, loss_func, optimizer, scheduler

In [6]:
def validate_model(model_comb, valid_loader):
    eval_loss = 0
    label_all = None
    pred_all = None
    
    model, loss_func, optimizer, scheduler = model_comb
    model.eval()
    
    for comb in valid_loader:  
        X = comb[:,:-1]
        label = comb[:,-1]
        X = X.view(torch.Size([-1, 1, 28, 28]))
        X = X.float()
        label = label.long()
        
        X = Variable(X) 
        label = Variable(label)
        
        valid_out = model(X)
        valid_loss = loss_func(valid_out,label)
        
        eval_loss += float(valid_loss)
        _, pred = valid_out.max(1)
    
        if label_all is None:
            label_all = label
        else:
            label_all = torch.cat([label_all,label])
        if pred_all is None:
            pred_all = torch.cat([pred])
        else:
            pred_all = torch.cat([pred_all,pred])

    y_test = label_all.cpu().detach().numpy()
    y_pred = pred_all.cpu().detach().numpy()
    
    return y_test, y_pred, eval_loss, len(valid_loader)

In [7]:
def train_validate(model_comb,train_loader,index):
    print("model_"+str(index))
    model_comb = train_model(model_comb,train_loader)
    y_test, y_pred, eval_loss, data_len = validate_model(model_comb,valid_loader)
    print('validation_accuracy:%.7f' %accuracy_score(y_true=y_test, y_pred=y_pred))
    print('validation_loss:%.7f' %(eval_loss/data_len))

In [8]:
def bagging():   
    y_test_1, y_pred_1, eval_loss_1, data_len_1 = validate_model(model1_comb,valid_loader)
    y_test_2, y_pred_2, eval_loss_2, data_len_2 = validate_model(model2_comb,valid_loader)
    y_test_3, y_pred_3, eval_loss_3, data_len_3 = validate_model(model3_comb,valid_loader)
    y_test_4, y_pred_4, eval_loss_4, data_len_4 = validate_model(model4_comb,valid_loader)
    y_test_5, y_pred_5, eval_loss_5, data_len_5 = validate_model(model5_comb,valid_loader)
    
    eval_loss = eval_loss_1 + eval_loss_2 + eval_loss_3 + eval_loss_4 + eval_loss_5
    data_len = data_len_1 + data_len_2 + data_len_3 + data_len_4 + data_len_5
    y_pred = np.zeros(len(y_test_1))

    for i in range(len(y_pred)):#vote
        y_pred[i] = (pd.Series(data=[y_pred_1[i], y_pred_2[i], y_pred_3[i], y_pred_4[i], y_pred_5[i]])).mode()[0]

    print('final_validation_accuracy:%.7f' %accuracy_score(y_true=y_test_1, y_pred=y_pred))
    print('final_validation_loss:%.7f' %(eval_loss/data_len))

In [9]:
model1_comb = init_model()
model2_comb = init_model()
model3_comb = init_model()
model4_comb = init_model()
model5_comb = init_model()

In [None]:
num_epochs = 40
    
for epoch in range(num_epochs):
    print("epoch:"+' ' +str(epoch))
    
    train_validate(model1_comb,train_loader_1,1)
    train_validate(model2_comb,train_loader_2,2)
    train_validate(model3_comb,train_loader_3,3)
    train_validate(model4_comb,train_loader_4,4)
    train_validate(model5_comb,train_loader_5,5)

bagging()

epoch: 0
model_1
train_accuracy: 0.704507672634271
train_loss: 0.9640275041008239
validation_accuracy:0.7764000
validation_loss:0.6674562
model_2
train_accuracy: 0.7045556265984655
train_loss: 0.9703263047406131
validation_accuracy:0.7636000
validation_loss:0.6775237
model_3
train_accuracy: 0.7095748081841433
train_loss: 0.9492436722111519
validation_accuracy:0.7742000
validation_loss:0.6718438
model_4
train_accuracy: 0.7018542199488491
train_loss: 0.9715272642462455
validation_accuracy:0.7576000
validation_loss:0.6842696
model_5
train_accuracy: 0.7020140664961637
train_loss: 0.975733178884477
validation_accuracy:0.7606000
validation_loss:0.6862850
epoch: 1
model_1
train_accuracy: 0.8013666879795397
train_loss: 0.5764463217666996
validation_accuracy:0.8048000
validation_loss:0.5513464
model_2
train_accuracy: 0.7975223785166241
train_loss: 0.5856866065955832
validation_accuracy:0.7998000
validation_loss:0.5684175
model_3
train_accuracy: 0.8015824808184143
train_loss: 0.5815236408387303


train_accuracy: 0.9300911125319693
train_loss: 0.23469137863429915
validation_accuracy:0.8504000
validation_loss:0.4030859
model_4
train_accuracy: 0.9296595268542199
train_loss: 0.2367991341654297
validation_accuracy:0.8424000
validation_loss:0.4237815
model_5
train_accuracy: 0.9272058823529412
train_loss: 0.2398773105553044
validation_accuracy:0.8426000
validation_loss:0.4388457
epoch: 13
model_1
train_accuracy: 0.9412563938618926
train_loss: 0.2144876836282213
validation_accuracy:0.8452000
validation_loss:0.4037638
model_2
train_accuracy: 0.9295796035805627
train_loss: 0.23309908706285154
validation_accuracy:0.8478000
validation_loss:0.4113427
model_3
train_accuracy: 0.9339354219948849
train_loss: 0.2232931100613321
validation_accuracy:0.8546000
validation_loss:0.3991218
model_4
train_accuracy: 0.9374760230179029
train_loss: 0.21922931833492826
validation_accuracy:0.8472000
validation_loss:0.4163629
model_5
train_accuracy: 0.9350383631713556
train_loss: 0.2230225670939821
validation_

In [None]:
#load best model and generate test.csv
model_load = Model()
model_load.load_state_dict(torch.load("fakepath/"))

pred_all = None
model_load.eval()

for X in test_loader:
    X = X.view(torch.Size([-1, 1, 28, 28]))
    X = X.float()
    X = Variable(X)
    
    test_out = model_load(X)
    
    _, pred = test_out.max(1)

    if pred_all is None:
        pred_all = torch.cat([pred])
    else:
        pred_all = torch.cat([pred_all,pred])

result = pred_all.numpy()
pd_csv = pd.DataFrame(result,columns=['label'])
pd_csv.to_csv('submit.csv',index_label = 'image_id')