# MNISTで比較

In [1]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_mldata
import numpy as np

import torch
torch.cuda.set_device(2)

## 1. MNISTデータセット準備

In [2]:
def set_data():
    mnist = fetch_mldata('MNIST original')
    mnist_X, mnist_y = shuffle(mnist.data, mnist.target, random_state=42)
    mnist_X = mnist_X / 255.0

    # pytorch用に型変換
    mnist_X, mnist_y = mnist_X.astype('float32'), mnist_y.astype('int64')

    # 2次元の画像を、各行を互い違いにして1次元に変換
    def flatten_img(images):
        '''
        images: shape => (n, rows, columns)
        output: shape => (n, rows*columns)
        '''
        n_rows    = images.shape[1]
        n_columns = images.shape[2]
        for num in range(n_rows):
            if num % 2 != 0:
                images[:, num, :] = images[:, num, :][:, ::-1]
        output = images.reshape(-1, n_rows*n_columns)
        return output

    mnist_X = mnist_X.reshape(-1, 28, 28)
    mnist_X = flatten_img(mnist_X)
    # X.shape => (n_samples, seq_len, n_features) に変換
    mnist_X = mnist_X[:, :, np.newaxis]

    # 訓練、テスト、検証データに分割
    train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                        test_size=0.2,
                                                        random_state=42)
    train_X, valid_X, train_y, valid_y = train_test_split(train_X, train_y,
                                                          test_size=0.1,
                                                          random_state=42)

    return train_X, test_X, train_y, test_y, valid_X, valid_y

train_X, test_X, train_y, test_y, valid_X, valid_y = set_data()

## 2. モデル構築

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from gru import GRU
from lstm import LSTM
from sru import SRU

## 3. 訓練の準備

In [4]:
import time
import math
import torch.optim as optim

# 計算時間を表示させる
def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


# batchあたりの訓練
def train(model, inputs, labels, optimizer, criterion, clip):
    # 隠れ変数の初期化
    model.initHidden(inputs.size(1))
    # 勾配の初期化
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    torch.nn.utils.clip_grad_norm(model.parameters(), clip)
    optimizer.step()
    return loss.data[0]


# 検証
def validate(model, inputs, labels, optimizer, criterion):
    # 隠れ変数の初期化
    model.initHidden(inputs.size(1))
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    return loss.data[0]

## 4. パラメータの設定

In [5]:
input_size = train_X.shape[2]
output_size = np.unique(train_y).size

In [6]:
''' GRU '''
hidden_size = 100
dropout = 0.2
lr = 0.01
init_forget_bias = 1
clip = 1

# インスタンスの作成
model = GRU(input_size, hidden_size, output_size, dropout=dropout)
model.initWeight(init_forget_bias)

In [7]:
''' LSTM '''
hidden_size = 100
dropout = 0.2
lr = 0.01
init_forget_bias = 1
clip = 1

# インスタンスの作成
model = LSTM(input_size, hidden_size, output_size, dropout=dropout)
model.initWeight(init_forget_bias)

In [6]:
''' SRU '''
phi_size = 200
r_size = 60
lr = 0.01
clip = 1

torch.cuda.manual_seed(0)
model = SRU(input_size, phi_size, r_size, output_size)
model.initWeight()

In [12]:
''' loss, optimizerの定義 '''
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr) # SRUはRMSprop等と相性が悪い?

## 5. 訓練

In [14]:
''' 訓練 '''
n_epochs = 15
batch_size = 128
n_batches = train_X.shape[0]//batch_size
n_batches_v = valid_X.shape[0]//batch_size

start_time = time.time()

for epoch in range(n_epochs):
    train_cost, valid_cost = 0, 0
    train_X, train_y = shuffle(train_X, train_y, random_state=42)

    # 訓練
    model.train()
    train_X_t = np.transpose(train_X, (1, 0, 2)) # X.shape => (seq_len, n_samples, n_features) に変換
    for i in range(n_batches):
        start = i * batch_size
        end = start + batch_size
        inputs, labels = train_X_t[:, start:end, :], train_y[start:end]
        inputs, labels = Variable(torch.from_numpy(inputs).cuda()
                         ), Variable(torch.from_numpy(labels).cuda())
        train_cost += train(model, inputs, labels, optimizer, criterion, clip) / n_batches

    # 検証
    model.eval()
    valid_X_t = np.transpose(valid_X, (1, 0, 2)) # X.shape => (seq_len, n_samples, n_features) に変換
    for i in range(n_batches_v):
        start = i * batch_size
        end = start + batch_size
        inputs, labels = valid_X_t[:, start:end, :], valid_y[start:end]
        inputs, labels = Variable(torch.from_numpy(inputs).cuda()
                         ), Variable(torch.from_numpy(labels).cuda())
        valid_cost += validate(model, inputs, labels, optimizer, criterion) / n_batches_v

    print('EPOCH:: %i, (%s) Training cost: %.5f, Validation cost: %.5f' % (epoch + 1,
                       timeSince(start_time), train_cost, valid_cost))

print('Finished Training')

RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1502008109146/work/torch/lib/THC/generic/THCTensorMath.cu:35

In [15]:
''' テスト '''
test_X_t = np.transpose(test_X[:500], (1, 0, 2))
inputs, labels = test_X_t, test_y[:500]
inputs, labels = Variable(torch.from_numpy(inputs).cuda()
                 ), Variable(torch.from_numpy(labels).cuda())
model.initHidden(inputs.size(1))
outputs = model(inputs)

# 正解数
(torch.max(outputs, 1)[1] == labels).sum()

RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1502008109146/work/torch/lib/THC/generic/THCStorage.cu:66

In [19]:
''' 勾配の確認 '''
loss = criterion(outputs, labels)
loss.backward()