In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
import numpy as np
import pandas as pd
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

In [2]:
# 网络结构的定义
class ResidualBlock(nn.Module):
    def __init__(self,inchannel,outchannel,stride=1):
        super(ResidualBlock,self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1,padding=1,bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if stride!=1 or inchannel !=outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel ,kernel_size=1,stride=stride,bias=False),
                nn.BatchNorm2d(outchannel)
            )
    def forward(self,x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out
    
class ResNet(nn.Module):
    def __init__(self, ResidualBlock, num_classes=10):
        super(ResNet,self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(1,64,kernel_size=3,stride=1,padding=1,bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, 128,2, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, 256,2, stride=2)
        self.layer4 = self.make_layer(ResidualBlock, 512,2, stride=2)
        self.fc = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.ReLU(),
            nn.Linear(512, num_classes)       
        )
        
    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)
    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out,4)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
def ResNet18():
        
    return ResNet(ResidualBlock)

In [4]:
# 训练集的读取
np_train_images = np.load('train.npy')
np_train_images = np_train_images.reshape(30000,1,28,28)
np_train_images = np_train_images.astype(np.float32)


train_labels_csv = pd.read_csv('train.csv')
np_train_labels = train_labels_csv.ix[0:,1:].as_matrix()
np_train_labels = np_train_labels.reshape(30000)


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  import sys
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
  import sys


In [3]:
# 网络基本参数的定义
Batch_size = 128
EPOCH = 10
LR = 0.001
loss_func = nn.CrossEntropyLoss()

In [16]:
# 分割训练集
new_np_train_images, new_np_test_images, new_np_train_labels, new_np_test_labels = train_test_split(np_train_images,np_train_labels,test_size=0.1,random_state=1)
# KFold函数编写
def kfold(X, y, n_splits):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)
    for train_index, test_index in kf.split(X, y):
        yield X[train_index], y[train_index],X[test_index], y[test_index]

In [6]:
#当在整个训练集上进行5个模型的训练时
j = 0
for np_X_train, np_Y_train, np_X_test, np_Y_test in kfold(np_train_images,np_train_labels,5):
    model = ResNet18()
    model.cuda()
    #optimizer = torch.optim.SGD(model.parameters(),lr=LR,momentum=0.9,weight_decay=5e-4)
    optimizer = torch.optim.Adam(model.parameters(),lr=LR)
    X_train = torch.from_numpy(np_X_train)
    Y_train = torch.from_numpy(np_Y_train)
    X_test = torch.from_numpy(np_X_test)
    Y_test = torch.from_numpy(np_Y_test)
    dataset = TensorDataset(X_train,Y_train)
    test_dataset = TensorDataset(X_test,Y_test)
    
    train_loader = DataLoader(
        dataset,batch_size=Batch_size,shuffle=True)
    test_loader = DataLoader(
        test_dataset,batch_size = 1,shuffle = False)
    loss_val = []
    acc_val = []
    predictions = []
    print("#####")
    print(j)
    for epoch in range(EPOCH):
        train_loss = 0
        train_acc = 0
        model.train()
        for i,(x,y) in enumerate(train_loader):
            batch_x = Variable(x.cuda())
            batch_y = Variable(y.cuda())
            
            output = model(batch_x)
            
            loss = loss_func(output, batch_y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            
            train_loss += float(loss)
            _,pred = output.max(1)
            predictions.append(pred)
            corrects = (pred == batch_y).sum()
            acc = int(corrects)/batch_x.shape[0]
            train_acc += acc
        loss_val.append(train_loss/len(train_loader))
        acc_val.append(train_loss/len(train_loader))
        print("epoch"+' '+str(epoch))
        print("Acc"+' '+str(train_acc/len(train_loader)))
        print("Loss"+' '+str(train_loss/len(train_loader)))
    torch.save(model.state_dict(),'multipara_Resnet_19_2_'+str(j)+'.pkl')
    j += 1

#####
0
epoch 0
Acc 0.703873005319149
Loss 0.7851690613525979
epoch 1
Acc 0.8047706117021277
Loss 0.5196055925272881
epoch 2
Acc 0.8413397606382979
Loss 0.4302480506136062
epoch 3
Acc 0.8619930186170213
Loss 0.3739982973387901
epoch 4
Acc 0.875748005319149
Loss 0.33808824554719824
epoch 5
Acc 0.8901678856382979
Loss 0.29611675599788095
epoch 6
Acc 0.9023021941489362
Loss 0.26816761604648953
epoch 7
Acc 0.9124418218085106
Loss 0.23945616906944742
epoch 8
Acc 0.921126994680851
Loss 0.2103721881879771
#####
1
epoch 0
Acc 0.7106466090425532
Loss 0.7744748776263379
epoch 1
Acc 0.8075132978723404
Loss 0.5123730909000052
epoch 2
Acc 0.842544880319149
Loss 0.42685107601449845
epoch 3
Acc 0.8635721409574468
Loss 0.3709755428135395
epoch 4
Acc 0.8753740026595744
Loss 0.3298413907276823
epoch 5
Acc 0.889170545212766
Loss 0.2971865023545762
epoch 6
Acc 0.9027593085106383
Loss 0.2623565268326313
epoch 7
Acc 0.913563829787234
Loss 0.2310213480303262
epoch 8
Acc 0.926279920212766
Loss 0.1990766942580

In [None]:
#当用train_test_split将训练数据集切分为9：1后，用其中的9份再分成4：1进行KFold
#以便自行进行模拟预测正确率
j = 0
for np_X_train, np_Y_train, np_X_test, np_Y_test in kfold(np_train_images,np_train_labels,5):
    model = ResNet18()
    model.cuda()
    #optimizer = torch.optim.SGD(model.parameters(),lr=LR,momentum=0.9,weight_decay=5e-4)
    optimizer = torch.optim.Adam(model.parameters(),lr=LR)
    X_train = torch.from_numpy(np_X_train)
    Y_train = torch.from_numpy(np_Y_train)
    X_test = torch.from_numpy(np_X_test)
    Y_test = torch.from_numpy(np_Y_test)
    dataset = TensorDataset(X_train,Y_train)
    test_dataset = TensorDataset(X_test,Y_test)
    
    train_loader = DataLoader(
        dataset,batch_size=Batch_size,shuffle=True)
    test_loader = DataLoader(
        test_dataset,batch_size = 1,shuffle = False)
    loss_val = []
    acc_val = []
    predictions = []
    print("#####")
    print(j)
    for epoch in range(EPOCH):
        train_loss = 0
        train_acc = 0
        model.train()
        for i,(x,y) in enumerate(train_loader):
            batch_x = Variable(x.cuda())
            batch_y = Variable(y.cuda())
            
            output = model(batch_x)
            
            loss = loss_func(output, batch_y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            
            train_loss += float(loss)
            _,pred = output.max(1)
            predictions.append(pred)
            corrects = (pred == batch_y).sum()
            acc = int(corrects)/batch_x.shape[0]
            train_acc += acc
        loss_val.append(train_loss/len(train_loader))
        acc_val.append(train_loss/len(train_loader))
        print("epoch"+' '+str(epoch))
        print("Acc"+' '+str(train_acc/len(train_loader)))
        print("Loss"+' '+str(train_loss/len(train_loader)))
    torch.save(model.state_dict(),'multipara_Resnet_19_'+str(j)+'.pkl')
    j += 1

In [17]:
# 建立module，读取参数，对预分割好的数据集的测试部分进行测试
model_1 = ResNet18()
model_1.eval()
model_1.cuda()
test_images = torch.from_numpy(new_np_test_images)
test_labels = torch.from_numpy(new_np_test_labels)
test_images = Variable(test_images).cuda()
test_labels = Variable(test_labels).cuda()
with torch.no_grad():
    model_1.load_state_dict(torch.load('multipara_Resnet_18_0.pkl'))
    predictions_1 = model_1(test_images)
    model_1.load_state_dict(torch.load('multipara_Resnet_18_1.pkl'))
    predictions_2 = model_1(test_images)
    model_1.load_state_dict(torch.load('multipara_Resnet_18_2.pkl'))
    predictions_3 = model_1(test_images)
    model_1.load_state_dict(torch.load('multipara_Resnet_18_3.pkl'))
    predictions_4 = model_1(test_images)
    model_1.load_state_dict(torch.load('multipara_Resnet_18_4.pkl'))
    predictions_5 = model_1(test_images)

In [18]:
# 如果训练在整个数据集上，这里的预测Acc会很高，
# 如果训练在分割为9：1的数据集上，这里的预测Acc很接近提交到Kaggle上的结果
_,predictions = (predictions_1+predictions_2+predictions_3+predictions_4+predictions_5).max(1)
print(predictions.size)
accuracy = float((predictions == test_labels).sum())
print("Acc:"+str(accuracy/len(new_np_test_labels)))

<built-in method size of Tensor object at 0x000002057ABF4B40>
Acc:0.9776666666666667


In [5]:
#生成提交的csv
model_1 = ResNet18()
model_1.eval()
model_1.cuda()
np_test_images = np.load('test.npy')
np_test_images = np_test_images.reshape(5000,1,28,28)
np_test_images = np_test_images.astype(np.float32)

test_labels_csv = pd.read_csv('samplesummission.csv')
np_test_labels = test_labels_csv.ix[0:,1:].as_matrix()
np_test_labels = np_test_labels.reshape(5000)


test_images = torch.from_numpy(np_test_images)
test_labels = torch.from_numpy(np_test_labels)
test_images = Variable(test_images).cuda()
test_labels = Variable(test_labels).cuda()
test_dataset = TensorDataset(test_images,test_labels)
test_loader = DataLoader(test_dataset,batch_size = 100,shuffle = False)
pred_all = None
with torch.no_grad():
    for i,(X,Y) in enumerate(test_loader):
        batch_x = Variable(X).cuda()
        model_1.load_state_dict(torch.load('multipara_Resnet_18_0.pkl'))
        output_1 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_18_1.pkl'))
        output_2 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_18_2.pkl'))
        output_3 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_18_3.pkl'))
        output_4 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_18_4.pkl'))
        output_5 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_19_0.pkl'))
        output_6 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_19_1.pkl'))
        output_7 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_19_2.pkl'))
        output_8 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_19_3.pkl'))
        output_9 = model_1(batch_x)
        model_1.load_state_dict(torch.load('multipara_Resnet_19_4.pkl'))
        output_10 = model_1(batch_x)
        
        pred = torch.max(output_1.cpu()+output_2.cpu()+output_3.cpu()+output_4.cpu()+output_5.cpu()+output_6.cpu()+output_7.cpu()+output_8.cpu()+output_9.cpu()+output_10.cpu(),1)[1].data.numpy().squeeze()
        if pred_all is None:
            pred_all = pred
        else:
            pred_all = np.append(pred_all,pred)
            
a = np.linspace(0,4999,5000,dtype=int)
result = pd.DataFrame({'image_id':a,'label':pred_all})
result.to_csv("submit1219_final.csv",index=False,sep=',')

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.
