In [1]:

# 単語分割

# 前処理

# 文章とラベルに分ける

# データを読み込む

# ボキャブラリーを作成

# DataLoaderの作成

#


# 単語分割

In [2]:
import MeCab
m_t = MeCab.Tagger('-Ochasen')

In [3]:
text = '機械学習が好きです'

In [4]:
print(m_t.parse(text).strip())

機械	キカイ	機械	名詞-一般		
学習	ガクシュウ	学習	名詞-サ変接続		
が	ガ	が	助詞-格助詞-一般		
好き	スキ	好き	名詞-形容動詞語幹		
です	デス	です	助動詞	特殊・デス	基本形
EOS


In [5]:
def tokenizer_mecab(text):
    text = m_t.parse(text) # mecab
    ret = text.strip().split()  # text処理
    return ret

# 前処理

In [33]:
import re

def preprocessing_text(text):
    # 前処理
    text = re.sub('\r', '', text) # 
    text = re.sub('\n', '', text) # 改行
    text = re.sub(' ', '', text) # 半角
    text = re.sub('　　', '', text) # 全角
    text = re.sub(r'[0-9 ０-９]', '0', text)

    return text

# tokenizerの作成

In [34]:
def tokenizer_with_preprocessing(text):
    text = preprocessing_text(text) # textの前処理
    ret = tokenizer_mecab(text) # mecabの単語分割
    return ret

# torchtext

In [35]:
import torchtext

max_length = 25
TEXT = torchtext.data.Field(sequential=True, tokenize=tokenizer_with_preprocessing,
                                                 use_vocab=True, lower=True, include_lengths=True, 
                                                 batch_first=True, fix_length=max_length)
LABEL = torchtext.data.Field(sequential=False, use_vocab=False)

# Datasetの作成

In [36]:
train_ds, val_ds, test_ds = torchtext.data.TabularDataset.splits(
    path='./data/', train='text_train.tsv',
    validation='text_val.tsv', test='text_test.tsv', format='tsv',
    fields=[('Text', TEXT), ('Label', LABEL)])

In [39]:
vars(train_ds[0])

{'Text': ['王',
  'オウ',
  '王',
  '名詞-一般',
  'と',
  'ト',
  'と',
  '助詞-並立助詞',
  '王子',
  'オウジ',
  '王子',
  '名詞-一般',
  'と',
  'ト',
  'と',
  '助詞-並立助詞',
  '女王',
  'ジョオウ',
  '女王',
  '名詞-一般',
  'と',
  'ト',
  'と',
  '助詞-並立助詞',
  '姫',
  'ヒメ',
  '姫',
  '名詞-一般',
  'と',
  'ト',
  'と',
  '助詞-並立助詞',
  '男性',
  'ダンセイ',
  '男性',
  '名詞-一般',
  'と',
  'ト',
  'と',
  '助詞-並立助詞',
  '女性',
  'ジョセイ',
  '女性',
  '名詞-一般',
  'が',
  'ガ',
  'が',
  '助詞-格助詞-一般',
  'い',
  'イ',
  'いる',
  '動詞-自立',
  '一段',
  '連用形',
  'まし',
  'マシ',
  'ます',
  '助動詞',
  '特殊・マス',
  '連用形',
  'た',
  'タ',
  'た',
  '助動詞',
  '特殊・タ',
  '基本形',
  '。',
  '。',
  '。',
  '記号-句点',
  'eos'],
 'Label': '0'}

# ボキャブラリーの作成

In [42]:
TEXT.build_vocab(train_ds, min_freq=1)
TEXT.vocab.freqs

Counter({'王': 2,
         'オウ': 1,
         '名詞-一般': 17,
         'と': 10,
         'ト': 5,
         '助詞-並立助詞': 5,
         '王子': 2,
         'オウジ': 1,
         '女王': 2,
         'ジョオウ': 1,
         '姫': 2,
         'ヒメ': 1,
         '男性': 2,
         'ダンセイ': 1,
         '女性': 2,
         'ジョセイ': 1,
         'が': 6,
         'ガ': 3,
         '助詞-格助詞-一般': 10,
         'い': 1,
         'イ': 1,
         'いる': 5,
         '動詞-自立': 6,
         '一段': 3,
         '連用形': 6,
         'まし': 1,
         'マシ': 1,
         'ます': 5,
         '助動詞': 9,
         '特殊・マス': 3,
         'た': 2,
         'タ': 1,
         '特殊・タ': 1,
         '基本形': 8,
         '。': 12,
         '記号-句点': 4,
         'eos': 4,
         '機械': 2,
         'キカイ': 1,
         '学習': 2,
         'ガクシュウ': 1,
         '名詞-サ変接続': 8,
         '好き': 2,
         'スキ': 1,
         '名詞-形容動詞語幹': 4,
         'です': 2,
         'デス': 1,
         '特殊・デス': 1,
         '本章': 4,
         'ホンショウ': 2,
         'から': 2,
         'カラ': 1,
         '自然

# DataLoaderの作成

In [44]:
train_dl= torchtext.data.Iterator(train_ds, batch_size=2, train=True)
val_dl = torchtext.data.Iterator(val_ds, batch_size=2, train=False, sort=False)
test_dl = torchtext.data.Iterator(test_ds, batch_size=2, train=False, sort=False)

In [48]:
# 動作確認
batch = next(iter(train_dl))
print(batch.Text)
print(batch.Label)

(tensor([[ 33,  63,  33,   2,  50,  90,  50,   5,  83,  95,  83,  32,  84,  93,
          84,   2,  66,  97,  66,   9,  55, 109,  55,   5, 119],
        [ 33,  63,  33,   2,  53, 106,  53,   5,  57, 111,  57, 116,  71,  96,
          71,   2,  45,  45,  45,   9,   8,  28,   8,   5,  82]]), tensor([25, 25]))
tensor([1, 0])


# IMDbデータセットの実装

In [11]:
from glob import glob
import os
import io

In [12]:
# 訓練データの作成
with open('./data/IMDb_train.tsv', 'w') as f:

    path = './data/aclImdb/train/pos/'
    for fname in glob(os.path.join(path, '*.txt')):
        with io.open(fname, 'r', encoding='utf-8') as ff:
            text = ff.readline()
            text = text.replace('\t', ' ')
            text = text+'\t'+'1'+'\t'+'\n'
            f.write(text)


    path = './data/aclImdb/train/neg/'
    for fname in glob(os.path.join(path, '*.txt')):
        with io.open(fname, 'r', encoding='utf-8') as ff:
            text = ff.readline()
            text = text.replace('\t', ' ')
            text = text+'\t'+'0'+'\t'+'\n'
            f.write(text)

In [13]:
# テストデータの作成
with open('./data/IMDb_test.tsv', 'w') as f:

    path = './data/aclImdb/test/pos/'
    for fname in glob(os.path.join(path, '*.txt')):
        with io.open(fname, 'r', encoding='utf-8') as ff:
            text = ff.readline()
            text = text.replace('\t', ' ')
            text = text+'\t'+'1'+'\t'+'\n'
            f.write(text)

    path = './data/aclImdb/test/neg/'
    for fname in glob(os.path.join(path, '*.txt')):
        with io.open(fname, 'r', encoding='utf-8') as ff:
            text = ff.readline()
            text = text.replace('\t', ' ')
            text = text+'\t'+'0'+'\t'+'\n'
            f.write(text)

#  前処理

In [14]:
import string
import re

# 前処理
def preprocessing_text(text):
    text = re.sub('<br />', '', text)
    
    # カンマ・ピリオド以外の記号をスペースに変換
    for p in string.punctuation:
        if (p =='.') or (p == ','):
            continue
        else:
            text = text.replace(p, ' ')
        
    # ピリオドの前後にはスペースを入れる
    text = text.replace('.', ' . ')
    text = text.replace(',', ' , ')
    
    return text

# 分かち書き
def tokenizer_punctuation(text):
    return text.strip().split()

# 前処理と分かち書きをまとめる
def tokenizer_with_preprocessing(text):
    text = preprocessing_text(text)
    ret = tokenizer_punctuation(text)
    return ret

In [15]:
print(tokenizer_with_preprocessing('I lick cats.'))

['I', 'lick', 'cats', '.']


# DataLoaderの作成

In [16]:
import torchtext

max_length = 256
TEXT = torchtext.data.Field(sequential=True, tokenize=tokenizer_with_preprocessing, use_vocab=True,
                            lower=True, include_lengths=True, batch_first=True, fix_length=max_length, init_token="<cls>", eos_token="<eos>")
LABEL = torchtext.data.Field(sequential=False, use_vocab=False)

In [17]:
train_val_ds, test_ds = torchtext.data.TabularDataset.splits(
    path='./data/', train='IMDb_train.tsv',
    test='IMDb_test.tsv', format='tsv',
    fields=[('Text', TEXT), ('Label', LABEL)])

In [18]:
print(vars(train_val_ds[0]))

{'Text': ['very', 'different', 'topic', 'treated', 'in', 'this', 'film', '.', 'a', 'straightforward', 'and', 'simple', 'description', 'of', 'local', 'chinese', 'customs', ',', 'by', 'looking', 'at', 'the', 'daily', 'operation', 'of', 'a', 'public', 'bath', ',', 'run', 'by', 'the', 'old', 'owner', 'and', 'his', 'retarded', 'son', ',', 'when', 'older', 'son', 'returns', 'home', ',', 'wrongly', 'believing', 'his', 'father', 'has', 'died', '.', 'how', 'every', 'man', 'in', 'town', 'makes', 'his', 'daily', 'visit', 'to', 'chat', ',', 'play', 'games', ',', 'discuss', 'personal', 'matters', 'and', 'get', 'honest', 'advice', ',', 'besides', 'the', 'usual', 'spa', 'like', 'therapies', '.', 'when', 'old', 'man', 'dies', ',', 'strong', 'and', 'loyal', 'family', 'ties', 'make', 'older', 'son', 'take', 'charge', ',', 'so', 'public', 'bath', 'operation', 'is', 'not', 'disrupted', '.', 'and', 'finally', ',', 'the', 'arrival', 'of', 'modernization', 'to', 'end', 'this', 'way', 'of', 'spending', 'relax

# 訓練と検証を分ける

In [19]:
import random

train_ds, val_ds = train_val_ds.split(split_ratio=0.8, random_state=random.seed(1234))

# ボキャブラリーの作成

In [20]:
from torchtext.vocab import Vectors

In [21]:
english_fasttext_vocabs = Vectors(name='./data/wiki-news-300d-1M.vec')

RuntimeError: no vectors found at .vector_cache/./data/wiki-news-300d-1M.vec

In [34]:
print('単語の次元数', english_fasttext_vocabs.dim)
print('単語数', len(english_fasttext_vocabs))

単語の次元数 300
単語数 999994


In [35]:
TEXT.build_vocab(train_ds, vectors=english_fasttext_vocabs, min_freq=10)

In [36]:
print(TEXT.vocab.vectors)
print(TEXT.vocab.stoi)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
defaultdict(<bound method Vocab._default_unk_index of <torchtext.vocab.Vocab object at 0x7f2c20444c90>>, {'<unk>': 0, '<pad>': 1, '<cls>': 2, '<eos>': 3})


# DataLoaderの作成

In [25]:
train_dl = torchtext.data.Iterator(train_ds, batch_size=24, train=True)
val_dl = torchtext.data.Iterator(val_ds, batch_size=24, train=False, sort=False)
test_dl = torchtext.data.Iterator(test_ds, batch_size=24, train=False, sort=False)

In [26]:
batch = next(iter(val_dl))
print(batch.Text)
print(batch.Label)

(tensor([[   2,   47,  223,  ...,  133, 1742,    3],
        [   2,   52,   55,  ...,    1,    1,    1],
        [   2,   14,  146,  ...,    1,    1,    1],
        ...,
        [   2,    5,    5,  ...,    1,    1,    1],
        [   2, 3089, 2184,  ...,  447,  285,    3],
        [   2,    4,  687,  ...,  345,    7,    3]]), tensor([256, 103, 174, 174, 202, 256, 204, 256, 153, 119, 256, 256, 132, 256,
         57, 256, 256, 143, 256, 256, 159, 211, 256, 256]))
tensor([1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0])


# Transformerの作成

In [7]:
# Embedder (単語数　→ 単語数*分散表現数)
# Positional Encoder 単語数*分散表現数の位置情報を加える(単語数*分散表現数 → 単語数*分散表現数)
# Transformer Blockモジュール

# Embedder

In [52]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Embedder(nn.Module):
    def __init__(self, text_embedding_vectors):
        super(Embedder, self).__init__()
        
        self.embeddings = nn.Embedding.from_pretrained(
        embeddings=text_embedding_vectors, freeze=True)
        
    def forward(self, x):
        y = self.embeddings(x)
        return y

In [17]:
import glob
import os
import io
import string
import re
import random
import math
import spacy
import torchtext
from torchtext.vocab import Vectors


def get_IMDb_DataLoaders_and_TEXT(max_length=256, batch_size=24):
    """IMDbのDataLoaderとTEXTオブジェクトを取得する。 """

    # 訓練データのtsvファイルを作成します
    f = open('./data/IMDb_train.tsv', 'w')

    path = './data/aclImdb/train/pos/'
    for fname in glob.glob(os.path.join(path, '*.txt')):
        with io.open(fname, 'r', encoding="utf-8") as ff:
            text = ff.readline()

            # タブがあれば消しておきます
            text = text.replace('\t', " ")

            text = text+'\t'+'1'+'\t'+'\n'
            f.write(text)

    path = './data/aclImdb/train/neg/'
    for fname in glob.glob(os.path.join(path, '*.txt')):
        with io.open(fname, 'r', encoding="utf-8") as ff:
            text = ff.readline()

            # タブがあれば消しておきます
            text = text.replace('\t', " ")

            text = text+'\t'+'0'+'\t'+'\n'
            f.write(text)

    f.close()

   # テストデータの作成
    f = open('./data/IMDb_test.tsv', 'w')

    path = './data/aclImdb/test/pos/'
    for fname in glob.glob(os.path.join(path, '*.txt')):
        with io.open(fname, 'r', encoding="utf-8") as ff:
            text = ff.readline()

            # タブがあれば消しておきます
            text = text.replace('\t', " ")

            text = text+'\t'+'1'+'\t'+'\n'
            f.write(text)

    path = './data/aclImdb/test/neg/'
    for fname in glob.glob(os.path.join(path, '*.txt')):
        with io.open(fname, 'r', encoding="utf-8") as ff:
            text = ff.readline()

            # タブがあれば消しておきます
            text = text.replace('\t', " ")

            text = text+'\t'+'0'+'\t'+'\n'
            f.write(text)
    f.close()

    def preprocessing_text(text):
        # 改行コードを消去
        text = re.sub('<br />', '', text)

        # カンマ、ピリオド以外の記号をスペースに置換
        for p in string.punctuation:
            if (p == ".") or (p == ","):
                continue
            else:
                text = text.replace(p, " ")

        # ピリオドなどの前後にはスペースを入れておく
        text = text.replace(".", " . ")
        text = text.replace(",", " , ")
        return text

    # 分かち書き（今回はデータが英語で、簡易的にスペースで区切る）
    def tokenizer_punctuation(text):
        return text.strip().split()


    # 前処理と分かち書きをまとめた関数を定義
    def tokenizer_with_preprocessing(text):
        text = preprocessing_text(text)
        ret = tokenizer_punctuation(text)
        return ret


    # データを読み込んだときに、読み込んだ内容に対して行う処理を定義します
    # max_length
    TEXT = torchtext.data.Field(sequential=True, tokenize=tokenizer_with_preprocessing, use_vocab=True,
                                lower=True, include_lengths=True, batch_first=True, fix_length=max_length, init_token="<cls>", eos_token="<eos>")
    LABEL = torchtext.data.Field(sequential=False, use_vocab=False)

    # フォルダ「data」から各tsvファイルを読み込みます
    train_val_ds, test_ds = torchtext.data.TabularDataset.splits(
        path='./data/', train='IMDb_train.tsv',
        test='IMDb_test.tsv', format='tsv',
        fields=[('Text', TEXT), ('Label', LABEL)])

    # torchtext.data.Datasetのsplit関数で訓練データとvalidationデータを分ける
    train_ds, val_ds = train_val_ds.split(
        split_ratio=0.8, random_state=random.seed(1234))

    # torchtextで単語ベクトルとして英語学習済みモデルを読み込みます
    english_fasttext_vectors = Vectors(name='data/wiki-news-300d-1M.vec')

    # ベクトル化したバージョンのボキャブラリーを作成します
    TEXT.build_vocab(train_ds, vectors=english_fasttext_vectors, min_freq=10)

    # DataLoaderを作成します（torchtextの文脈では単純にiteraterと呼ばれています）
    train_dl = torchtext.data.Iterator(
        train_ds, batch_size=batch_size, train=True)

    val_dl = torchtext.data.Iterator(
        val_ds, batch_size=batch_size, train=False, sort=False)

    test_dl = torchtext.data.Iterator(
        test_ds, batch_size=batch_size, train=False, sort=False)

    return train_dl, val_dl, test_dl, TEXT

In [18]:
train_dl, val_dl, test_dl, TEXT = get_IMDb_DataLoaders_and_TEXT(
    max_length=256, batch_size=24)

RuntimeError: no vectors found at .vector_cache/data/wiki-news-300d-1M.vec

In [32]:
# 動作確認

## ミニバッチ
batch = next(iter(train_dl))

## モデル構築
net1 = Embedder(TEXT.vocab.vectors)

## 入出力
x = batch.Text[0]
x1 = net1(x)

In [33]:
print(x.shape)
print(y.shape)

torch.Size([24, 256])


NameError: name 'y' is not defined

# Positional Encoder

In [34]:
class PositionalEncoder(nn.Module):
    def __init__(self, d_model=300, max_seq_len=256):
        super(PositionalEncoder, self).__init__()
        
        # 単語ベクトルの次元数
        self.d_model = d_model
        
        # 単語の順番posとベクトルの次元位置iの(p, i)によって一意に定まる表を作成する
        pe = torch.zeros(max_seq_len, d_model)
        
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                pe[pos, i] = math.sin(
                                        pos / (10000 ** ((2*i)/d_model)))
                pe[pos, i+1] = math.cos(
                                        pos / (10000 ** ((2*(i+1))/d_model)))
        
        self.pe = pe.unsqueeze(0)
        
        # 勾配を計算しないようにする
        self.pe.requires_grad = False
        
    def forward(self, x):
        # 入力xとPositional Encoderを足し算する
        ret = math.sqrt(self.d_model)*x + self.pe
        return ret

In [35]:
# 動作確認

## モデル構築
net1 = Embedder(TEXT.vocab.vectors)
net2 = PositionalEncoder(d_model=300, max_seq_len=256)

## 入出力
x = batch.Text[0]
x1 = net1(x)
x2 = net2(x1)

In [36]:
print('入力テンソルサイズ', x1.shape)
print('出力テンソルサイズ', x2.shape)

入力テンソルサイズ torch.Size([24, 256, 300])
出力テンソルサイズ torch.Size([24, 256, 300])


# TransformerBlockモジュール

In [37]:
# LayerNormalization: 特徴量の正規化を行う
# Dropout: 過学習防止
# Attention
# FeedForward: 特徴量変換
# からなる
# * 実際のTransformerのAttentionではMulti-Headed Attentionを採用している
# <pad>の部分にはmask=0をつけるが，Attentionでは-1e9とすることでsoftmaxの出力を0にする

In [38]:
# Attentionの作成
class Attention(nn.Module):
    def __init__(self, d_model=300):
        super().__init__()
        
        # 特徴量の作成
        self.q_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        
        # 出力の全結合層
        self.out = nn.Linear(d_model, d_model)
        
        # Attentionの大きさ調整の変数
        self.d_k = d_model
        
    def forward(self, q, k, v, mask):
        q = self.q_linear(q)
        k = self.k_linear(k)
        v = self.v_linear(v)
        
        # Attentionの値を計算する
        weights = torch.matmul(q, k.transpose(1, 2)) / math.sqrt(self.d_k)
        
        # maskを計算
        mask = mask.unsqueeze(1)
        weights = weights.masked_fill(mask==0, -1e9)
        
        # softmaxで規格化する
        normalized_weights = F.softmax(weights, dim=-1)
        
        # AttentionをValueと掛け算
        output = torch.matmul(normalized_weights, v)
        
        # 特徴量を変換
        output = self.out(output)
        
        return output, normalized_weights

In [39]:
# FeedForwardの作成
class FeedForward(nn.Module):
    def __init__(self, d_model, d_ff=1024, dropout=0.1):
        super().__init__()
        
        self.linear_1 = nn.Linear(d_model, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(d_ff, d_model)
        
    def forward(self, x):
        x = self.linear_1(x)
        x = self.dropout(x)
        x = self.linear_2(x)
        return x

In [40]:
# Transformer Blockの作成
class TransformerBlock(nn.Module):
    def __init__(self, d_model, dropout=0.1):
        super().__init__()
        
        # LayerNorm層
        self.norm_1 = nn.LayerNorm(d_model)
        self.norm_2 = nn.LayerNorm(d_model)
        
        # Attention層
        self.attn = Attention(d_model)
        
        # 全結合層
        self.ff = FeedForward(d_model)
        
        # Dropout
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)
        
    def forward(self, x, mask):
        # 正規化とAttention
        x_normalized = self.norm_1(x)
        output, normalized_weights = self.attn(
            x_normalized, x_normalized, x_normalized, mask)
        
        x2 = x + self.dropout_1(output)
        
        # 正規化と全結合層構築
        x_normalized2 = self.norm_2(x2)
        output = x2 + self.dropout_2(self.ff(x_normalized2))
        
        return output, normalized_weights

In [41]:
# 動作確認

## モデル構築
net1 = Embedder(TEXT.vocab.vectors)
net2 = PositionalEncoder(d_model=300, max_seq_len=256)
net3 = TransformerBlock(d_model=300)

## maskの作成
x = batch.Text[0]
input_pad = 1   # padding ID
input_mask = (x != input_pad)
# print(input_mask[0])

## 入出力
x1 = net1(x)
x2 = net2(x1)
x3, normalized_weights = net3(x2, input_mask)

# classificationHeadモジュール

In [42]:
class ClassificationHead(nn.Module):
    def __init__(self, d_model=300, output_dim=2):
        super().__init__()
        
        # 全結合層
        self.linear = nn.Linear(d_model, output_dim)
        
        # 重み初期化
        nn.init.normal_(self.linear.weight, std=0.02)
        nn.init.normal_(self.linear.bias, 0)
        
    def forward(self, x):
        x0 = x[:, 0, :]   # 各文の先頭の単語の特徴量を取り出す
        out = self.linear(x0)
        
        return out

# Transformerの実装

In [43]:
class TransformerClassification(nn.Module):
    def __init__(self, text_embedding_vectors, d_model=300, max_seq_len=256,
                           output_dim=2):
        super().__init__()
        
        # モデルの構築
        self.net1 = Embedder(text_embedding_vectors)
        self.net2 = PositionalEncoder(d_model, max_seq_len)
        self.net3_1 = TransformerBlock(d_model)
        self.net3_2 = TransformerBlock(d_model)
        self.net4 = ClassificationHead(d_model, output_dim)
        
    def forward(self, x, mask):
        x1 = self.net1(x)
        x2 = self.net2(x1)
        x3_1, normalized_weights_1 = self.net3_1(x2, mask)
        x3_2, normalized_weights_2 = self.net3_2(x3_1, mask)
        x4 = self.net4(x3_2)
        return x4, normalized_weights_1, normalized_weights_2

In [44]:
# 動作確認

## モデル構築
net = TransformerClassification(TEXT.vocab.vectors, d_model=300, max_seq_len=256,
                               output_dim=2)

## 入出力
x = batch.Text[0]
input_pad = 1
input_mask = (x != input_pad)
out, normalized_weights_1, normalized_weights_2 = net(x, input_mask)

# Transformerの学習・推論

In [26]:
# 読み込み
train_dl, val_dl, test_dl, TEXT = get_IMDb_DataLoaders_and_TEXT(
    max_length=256, batch_size=24)

In [27]:
# 辞書オブジェクトにまとめる
dataloaders_dict = {'train': train_dl, 'val': val_dl}

In [46]:
# モデルの構築
net = TransformerClassification(
    text_embedding_vectors=TEXT.vocab.vectors, d_model=300, max_seq_len=256, output_dim=2)

In [49]:
# パラメータの初期化を定義
def weights_init(m):
    classname =  m.__class__.__name__
    if classname.find('Linear') != -1:
        nn.init.kaiming_normal_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)


# 訓練モード
net.train()

# パラメータ初期化
net.net3_1.apply(weights_init)
net.net3_2.apply(weights_init)

TransformerBlock(
  (norm_1): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
  (norm_2): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
  (attn): Attention(
    (q_linear): Linear(in_features=300, out_features=300, bias=True)
    (v_linear): Linear(in_features=300, out_features=300, bias=True)
    (k_linear): Linear(in_features=300, out_features=300, bias=True)
    (out): Linear(in_features=300, out_features=300, bias=True)
  )
  (ff): FeedForward(
    (linear_1): Linear(in_features=300, out_features=1024, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear_2): Linear(in_features=1024, out_features=300, bias=True)
  )
  (dropout_1): Dropout(p=0.1, inplace=False)
  (dropout_2): Dropout(p=0.1, inplace=False)
)

In [53]:
# 損失関数
criterion = nn.CrossEntropyLoss()

# 最適化手法
learning_rate = 2e-5
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [61]:
# 訓練と検証

def train_model(net, datalloaders_dict, criterion, optimizer, num_epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('----start----')
    net.to(device)
    
    torch.backends.cudnn.benchmark = True
    
    for epoch in range(num_epochs):
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()
            else:
                net.eval()
            
            epoch_loss = 0.0
            epoch_corrects = 0
            
            for batch in (dataloaders_dict[phase]):
                inputs = batch.Text[0].to(device)
                labels = batch.Label.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    
                    # maskの作成
                    input_pad = 1
                    input_mask = (inputs != input_pad)
                    
                    # Transformerに入力
                    outputs, _, _ = net(inputs, input_mask)
                    loss = criterion(outputs, labels)
                    
                    _, preds = torch.max(outputs, 1)
                    
                    # 更新
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    
                    # 結果の計算
                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds == labels.data)
            
            # epochごとのlossと正解率
            epoch_loss = epoch_loss / len(dataloaders_dict[pahse].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloaders_dict[pahse].dataset)
            
            print('Epoch {}/{} | {:^5} | Loss: {:.4f} Acc: {:.4f}'.format(
                                                                         epoch+1,
                                                                         num_epochs,
                                                                         pahse,
                                                                         epoch_loss,
                                                                         epoch_acc))
    return net

In [62]:
num_epochs = 10
net_trained = train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

----start----


RuntimeError: expected device cuda:0 but got device cpu