# **80. ID番号への変換**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# データダウンロード
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00359/NewsAggregatorDataset.zip
!unzip NewsAggregatorDataset.zip

--2023-06-22 08:14:57--  https://archive.ics.uci.edu/ml/machine-learning-databases/00359/NewsAggregatorDataset.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘NewsAggregatorDataset.zip’

NewsAggregatorDatas     [     <=>            ]  27.87M  31.5MB/s    in 0.9s    

2023-06-22 08:14:59 (31.5 MB/s) - ‘NewsAggregatorDataset.zip’ saved [29224203]

Archive:  NewsAggregatorDataset.zip
  inflating: 2pageSessions.csv       
   creating: __MACOSX/
  inflating: __MACOSX/._2pageSessions.csv  
  inflating: newsCorpora.csv         
  inflating: __MACOSX/._newsCorpora.csv  
  inflating: readme.txt              
  inflating: __MACOSX/._readme.txt   


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# データの読み込み
df = pd.read_csv("newsCorpora.csv", sep="\t", names=("ID","TITLE","URL","PUBLISHER","CATEGORY","STORY","HOSTNAME","TIMESTAMP"))

# "TITLE"と"CATEGORY"を抽出
data = df.loc[df["PUBLISHER"].isin(["Reuters","Huffington Post","Businessweek","Contactmusic.com","Daily Mail"]), ["TITLE","CATEGORY"]]

# データ分割　学習:検証:テスト=8:1:1
train, others = train_test_split(data, test_size=0.2, random_state=0, shuffle=True)
dev, test = train_test_split(others, test_size=0.5, random_state=0, shuffle=True)

# ファイルに保存
train.to_csv("train.txt", sep="\t", index=None)
dev.to_csv("dev.txt", sep="\t", index=None)
test.to_csv("test.txt", sep="\t", index=None)

# 事例数の確認
print(f'学習データの事例数\n{train["CATEGORY"].value_counts()}\n')
print(f'検証データの事例数\n{dev["CATEGORY"].value_counts()}\n')
print(f'テストデータの事例数\n{test["CATEGORY"].value_counts()}\n')

学習データの事例数
b    4481
e    4240
t    1214
m     737
Name: CATEGORY, dtype: int64

検証データの事例数
b    575
e    528
t    137
m     94
Name: CATEGORY, dtype: int64

テストデータの事例数
b    571
e    511
t    173
m     79
Name: CATEGORY, dtype: int64



In [None]:
import string
import re

def preprocessing(text):
    table = str.maketrans(string.punctuation, ' '*len(string.punctuation))
    text = text.translate(table)  # 記号をスペースに置換
    text = re.sub(r"\s+", " ", text)  # 2つ以上の空白を1つにまとめる
    return text

In [None]:
import collections

# 前処理
train["TITLE"] = train["TITLE"].map(lambda x: preprocessing(x))

# 全文章をまとめる
all_sentences = " ".join(train["TITLE"].tolist()).split(" ")

# 単語の頻度を計算
all_word_cnt = collections.Counter(all_sentences) # Counterは辞書型
del all_word_cnt[""] # 空白を削除

# IDの付与
word2id = {word: i + 1 for i, (word, cnt) in enumerate(all_word_cnt.most_common()) if cnt > 1} # most_common()で（単語, 頻度）形式のリストにする

print(f'ID数: {len(set(word2id.values()))}\n')
print('頻度上位12語')
for key in list(word2id)[:12]:
    print(f'{key}: {word2id[key]}')

ID数: 9481

頻度上位12語
to: 1
s: 2
in: 3
UPDATE: 4
on: 5
as: 6
US: 7
for: 8
of: 9
The: 10
1: 11
To: 12


In [None]:
# 単語に対応するIDを返す関数：未知語は"0"を返す
def word_to_id(text, word2id=word2id, unk=0):
  return [word2id.get(word, unk) for word in text.split()]

In [None]:
text = "I have a pen ."
print(f'テキスト: {text}')
print(f'ID列: {word_to_id(text)}')

テキスト: I have a pen .
ID列: [84, 209, 19, 0, 0]


# **81. RNNによる予測**

In [None]:
from torch.utils.data import Dataset
import torch

class CreateDataset(Dataset):
    def __init__(self, X, y, word_to_id):
        self.X = X
        self.y = y
        self.word_to_id = word_to_id

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        text = self.X[index]
        inputs = self.word_to_id(text)

        return {
          'inputs': torch.tensor(inputs, dtype=torch.int64),
          'labels': torch.tensor(self.y[index], dtype=torch.int64)
        }

In [None]:
category_dict = {'b': 0, 't': 1, 'e':2, 'm':3}

# train
df_train = pd.read_csv("train.txt", sep="\t") # 読み込み
X_train = df_train["TITLE"].map(preprocessing) # 前処理
y_train = df_train["CATEGORY"].map(lambda x: category_dict[x]).values # ラベルを数値に変換

# dev
df_dev = pd.read_csv("dev.txt", sep="\t")
X_dev = df_dev["TITLE"].map(preprocessing)
y_dev = df_dev["CATEGORY"].map(lambda x: category_dict[x]).values

# test
df_test = pd.read_csv("test.txt", sep="\t")
X_test = df_test["TITLE"].map(preprocessing)
y_test = df_test["CATEGORY"].map(lambda x: category_dict[x]).values

# データセット作成
dataset_train = CreateDataset(X_train, y_train, word_to_id)
dataset_dev = CreateDataset(X_dev, y_dev, word_to_id)
dataset_test = CreateDataset(X_test, y_test, word_to_id)

In [None]:
from torch import nn

class RNN(nn.Module):
    def __init__(self, n_input, n_embed, n_hidden, n_output, padding_idx):
        super(RNN, self).__init__()
        self.embed = nn.Embedding(num_embeddings=n_input, embedding_dim=n_embed, padding_idx=padding_idx)
        self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden)
        self.fc = nn.Linear(in_features=n_hidden, out_features=n_output)

    # 順伝播
    def forward(self, x):
        o, (h, c) = self.lstm(self.embed(x))
        return self.fc(o[:, -1, :])

In [None]:
n_input = len(set(word2id.values())) + 1
n_embed = 128  # 単語ベクトルの次元
n_hidden = 256  # 文ベクトルの次元
n_output = 4  # 出力ベクトルの次元（=ラベルの種類数）
padding_idx = len(set(word2id.values()))

# モデル定義
model = RNN(n_input, n_embed, n_hidden, n_output, padding_idx)

# 先頭10件の予測値取得
for i in range(10):
    X = dataset_train[i]["inputs"]
    print(torch.softmax(model(X.unsqueeze(0)), dim=-1))

tensor([[0.2509, 0.2519, 0.2480, 0.2492]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2580, 0.2427, 0.2397, 0.2596]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2541, 0.2288, 0.2535, 0.2636]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2650, 0.2392, 0.2469, 0.2490]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2607, 0.2480, 0.2464, 0.2449]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2328, 0.2487, 0.2541, 0.2644]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2488, 0.2410, 0.2566, 0.2536]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2515, 0.2536, 0.2391, 0.2558]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2696, 0.2338, 0.2542, 0.2423]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2568, 0.2406, 0.2590, 0.2436]], grad_fn=<SoftmaxBackward0>)


# **82. 確率的勾配降下法による学習**

In [None]:
from torch.utils.data import DataLoader

batch_size = 1

# データセットオブジェクトからデータローダーを作成
train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
dev_loader = DataLoader(dataset_dev, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=False)

In [None]:
def calc_loss_accuracy(model, loader, criterion, device):
    with torch.no_grad():
        acc_list = []
        loss_list = []
        for batch in loader:
            x = batch["inputs"].to(device)
            t = batch["labels"].to(device)
            y = model(x)

            # loss計算
            loss = criterion(y, t).item()
            loss_list.append(loss)

            # accuracy計算
            pred = torch.argmax(y, dim=1)
            acc = (pred == t).sum().item() * 1.0 / len(t)
            acc_list.append(acc)

    return torch.tensor(loss_list).mean(), torch.tensor(acc_list).mean()


def train_model(batch_size, model, criterion, optimizer, num_epochs, nlp, device=None):
    model.to(device)

    log_train, log_dev = [], []
    for i in range(num_epochs):

        for batch in train_loader:
            x = batch["inputs"].to(device)
            t = batch["labels"].to(device)
            y = model(x)
            loss = criterion(y, t)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # 推論モード
        model.eval()

        loss_train, acc_train = calc_loss_accuracy(model, train_loader, criterion, device)
        loss_dev, acc_dev = calc_loss_accuracy(model, dev_loader, criterion, device)
        log_train.append([loss_train, acc_train])
        log_dev.append([loss_dev, acc_dev])

        # チェックポイント保存
        torch.save({"epoch":i, "model_state_dict":model.state_dict(), "optimizer_state_dict":optimizer.state_dict()}, f"{nlp}/checkpoint_{i+1}.pt")

        # ログ表示
        print(f"epoch:{i+1}  loss_train:{loss_train:.4f}  acc_train:{acc_train:.4f}  loss_dev:{loss_dev:.4f}  acc_dev:{acc_dev:.4f}")

In [None]:
! mkdir nlp82

n_input = len(set(word2id.values())) + 1
n_embed = 128  # 単語ベクトルの次元
n_hidden = 256  # 文ベクトルの次元
n_output = 4  # 出力ベクトルの次元（=ラベルの種類数）
padding_idx = len(set(word2id.values()))
nlp="nlp82"

# モデル定義
model = RNN(n_input, n_embed, n_hidden, n_output, padding_idx)

criterion = nn.CrossEntropyLoss() #損失関数

optimizer = torch.optim.SGD(model.parameters(), lr=0.1) # 最適化手法

device = torch.device("cpu")

train_model(1, model, criterion, optimizer, 5, nlp)

mkdir: cannot create directory ‘nlp82’: File exists
epoch:1  loss_train:0.9891  acc_train:0.5970  loss_dev:1.1112  acc_dev:0.5210
epoch:2  loss_train:0.7810  acc_train:0.7115  loss_dev:1.0037  acc_dev:0.6064
epoch:3  loss_train:0.6669  acc_train:0.7543  loss_dev:0.9617  acc_dev:0.6522
epoch:4  loss_train:0.5875  acc_train:0.7759  loss_dev:0.9925  acc_dev:0.6514
epoch:5  loss_train:0.5661  acc_train:0.7852  loss_dev:1.0197  acc_dev:0.6327


# **83. ミニバッチ化・GPU上での学習**

In [None]:
from torch import nn

class RNN(nn.Module):
    def __init__(self, n_input, n_embed, n_hidden, n_output, padding_idx):
        super(RNN, self).__init__()
        self.embed = nn.Embedding(num_embeddings=n_input, embedding_dim=n_embed, padding_idx=padding_idx)
        self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden, batch_first=True)
        self.fc = nn.Linear(in_features=n_hidden, out_features=n_output)

    # 順伝播
    def forward(self, x):
        o, (h, c) = self.lstm(self.embed(x))
        return self.fc(o[:, -1, :])

In [None]:
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

# 最大系列長に揃える関数を定義：ミニバッチでは系列長を揃えないとダメ
def collate_fn_(batch):
    sequences = [x["inputs"] for x in batch]
    sequences_padded = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True, padding_value=len(set(word2id.values()))) # パディング
    labels = torch.LongTensor([x["labels"] for x in batch])

    return {'inputs': sequences_padded, 'labels': labels}


batch_size = 32

# データセットオブジェクトからデータローダーを作成
train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate_fn_)
dev_loader = DataLoader(dataset_dev, batch_size=batch_size, shuffle=False, collate_fn=collate_fn_)
test_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=False, collate_fn=collate_fn_)

In [None]:
! mkdir nlp83

n_input = len(set(word2id.values())) + 1
n_embed = 128  # 単語ベクトルの次元
n_hidden = 256  # 文ベクトルの次元
n_output = 4  # 出力ベクトルの次元（=ラベルの種類数）
padding_idx = len(set(word2id.values()))
nlp="nlp83"

# モデル定義
model = RNN(n_input, n_embed, n_hidden, n_output, padding_idx)
print(model)

criterion = nn.CrossEntropyLoss() #損失関数

optimizer = torch.optim.SGD(model.parameters(), lr=0.05) # 最適化手法

device = torch.device("cuda")

train_model(32, model, criterion, optimizer, 10, nlp)

mkdir: cannot create directory ‘nlp83’: File exists
RNN(
  (embed): Embedding(9482, 128, padding_idx=9481)
  (lstm): LSTM(128, 256, batch_first=True)
  (fc): Linear(in_features=256, out_features=4, bias=True)
)
epoch:1  loss_train:1.1583  acc_train:0.4372  loss_dev:1.1438  acc_dev:0.4571
epoch:2  loss_train:1.1484  acc_train:0.4908  loss_dev:1.1360  acc_dev:0.4910
epoch:3  loss_train:1.1224  acc_train:0.5262  loss_dev:1.1095  acc_dev:0.5275
epoch:4  loss_train:0.9568  acc_train:0.6412  loss_dev:0.9228  acc_dev:0.6569
epoch:5  loss_train:0.9790  acc_train:0.6445  loss_dev:0.9642  acc_dev:0.6403
epoch:6  loss_train:0.7832  acc_train:0.7275  loss_dev:0.7598  acc_dev:0.7405
epoch:7  loss_train:0.7164  acc_train:0.7505  loss_dev:0.7033  acc_dev:0.7635
epoch:8  loss_train:0.6332  acc_train:0.7747  loss_dev:0.6601  acc_dev:0.7754
epoch:9  loss_train:0.6785  acc_train:0.7599  loss_dev:0.7479  acc_dev:0.7391
epoch:10  loss_train:0.5530  acc_train:0.7905  loss_dev:0.6682  acc_dev:0.7597


# **84. 単語ベクトルの導入**

In [None]:
# ライブラリのインストール
! pip install gensim==4.0.1

from gensim import models

# 単語ベクトルの読み込み
en_w2v = models.KeyedVectors.load_word2vec_format('drive/MyDrive/nlp100/GoogleNews-vectors-negative300.bin', binary=True)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gensim==4.0.1
  Downloading gensim-4.0.1.tar.gz (23.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.1/23.1 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gensim
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for gensim (setup.py) ... [?25lerror
[31m  ERROR: Failed building wheel for gensim[0m[31m
[0m[?25h  Running setup.py clean for gensim
Failed to build gensim
[31mERROR: Could not build wheels for gensim, which is required to install pyproject.toml-based projects[0m[31m


In [None]:
import numpy as np

# 学習済み単語ベクトルの取得
n_inputs = len(set(word2id.values())) + 1
n_embed = 300
weights = np.zeros((n_inputs, n_embed))
words_in_pretrained = 0
for i, word in enumerate(word2id.keys()):
  try:
    weights[i] = en_w2v[word]
    words_in_pretrained += 1
  except KeyError:
    weights[i] = np.random.normal(scale=0.4, size=(n_embed,))
weights = torch.from_numpy(weights.astype((np.float32)))

print(f'学習済みベクトル利用単語数: {words_in_pretrained} / {n_inputs}')
print(weights.size())

学習済みベクトル利用単語数: 9239 / 9482
torch.Size([9482, 300])


In [None]:
from torch import nn

class RNN(nn.Module):
    def __init__(self, n_input, n_embed, n_hidden, n_output, padding_idx, emb_weights=None):
        super(RNN, self).__init__()
        if emb_weights != None:  # 指定があれば埋め込み層の重みをemb_weightsで初期化
            self.embed = nn.Embedding.from_pretrained(emb_weights, padding_idx=padding_idx)
        else:
            self.embed = nn.Embedding(num_embeddings=n_input, embedding_dim=n_embed, padding_idx=padding_idx)
        self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden, batch_first=True)
        self.fc = nn.Linear(in_features=n_hidden, out_features=n_output)

    # 順伝播
    def forward(self, x):
        o, (h, c) = self.lstm(self.embed(x))
        return self.fc(o[:, -1, :])

In [None]:
! mkdir nlp84

n_input = len(set(word2id.values())) + 1
n_embed = 300  # 単語ベクトルの次元
n_hidden = 256  # 文ベクトルの次元
n_output = 4  # 出力ベクトルの次元（=ラベルの種類数）
padding_idx = len(set(word2id.values()))
nlp="nlp84"

# モデル定義
model = RNN(n_input, n_embed, n_hidden, n_output, padding_idx, emb_weights=weights)
print(model)

criterion = nn.CrossEntropyLoss() #損失関数

optimizer = torch.optim.SGD(model.parameters(), lr=0.05) # 最適化手法

device = torch.device("cuda")

train_model(32, model, criterion, optimizer, 10, nlp)

mkdir: cannot create directory ‘nlp84’: File exists
RNN(
  (embed): Embedding(9482, 300, padding_idx=9481)
  (lstm): LSTM(300, 256, batch_first=True)
  (fc): Linear(in_features=256, out_features=4, bias=True)
)
epoch:1  loss_train:1.1625  acc_train:0.4228  loss_dev:1.1483  acc_dev:0.4355
epoch:2  loss_train:1.1595  acc_train:0.4389  loss_dev:1.1459  acc_dev:0.4531
epoch:3  loss_train:1.1561  acc_train:0.4683  loss_dev:1.1435  acc_dev:0.4788
epoch:4  loss_train:1.1514  acc_train:0.4705  loss_dev:1.1395  acc_dev:0.4832
epoch:5  loss_train:1.1446  acc_train:0.4896  loss_dev:1.1336  acc_dev:0.5003
epoch:6  loss_train:1.1372  acc_train:0.4869  loss_dev:1.1225  acc_dev:0.5007
epoch:7  loss_train:1.1275  acc_train:0.4993  loss_dev:1.0971  acc_dev:0.5078
epoch:8  loss_train:1.0518  acc_train:0.5753  loss_dev:1.0595  acc_dev:0.5752
epoch:9  loss_train:0.9680  acc_train:0.6488  loss_dev:0.9452  acc_dev:0.6506
epoch:10  loss_train:0.9367  acc_train:0.6589  loss_dev:0.9030  acc_dev:0.6763


# **85. 双方向・多層化**

In [None]:
from torch import nn

class RNN(nn.Module):
    def __init__(self, n_input, n_embed, n_hidden, n_layers, n_output, padding_idx, bidirectional, emb_weights=None):
        super(RNN, self).__init__()
        if emb_weights != None:  # 指定があれば埋め込み層の重みをemb_weightsで初期化
            self.embed = nn.Embedding.from_pretrained(emb_weights, padding_idx=padding_idx)
        else:
            self.embed = nn.Embedding(num_embeddings=n_input, embedding_dim=n_embed, padding_idx=padding_idx)
        self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden, num_layers=n_layers, bidirectional=bidirectional, batch_first=True)
        self.fc = nn.Linear(in_features=n_hidden * (2 if bidirectional==True else 1), out_features=n_output)

    # 順伝播
    def forward(self, x):
        o, (h, c) = self.lstm(self.embed(x))
        return self.fc(o[:, -1, :])

In [None]:
! mkdir nlp85

n_input = len(set(word2id.values())) + 1
n_embed = 300  # 単語ベクトルの次元
n_hidden = 256  # 文ベクトルの次元
n_layers = 2
n_output = 4  # 出力ベクトルの次元（=ラベルの種類数
bidirectional = True
padding_idx = len(set(word2id.values()))
nlp="nlp85"

# モデル定義
model = RNN(n_input, n_embed, n_hidden, n_layers, n_output, padding_idx, bidirectional, emb_weights=weights)
print(model)

criterion = nn.CrossEntropyLoss() #損失関数

optimizer = torch.optim.SGD(model.parameters(), lr=0.05) # 最適化手法

device = torch.device("cuda")

train_model(32, model, criterion, optimizer, 10, nlp)

RNN(
  (embed): Embedding(9482, 300, padding_idx=9481)
  (lstm): LSTM(300, 256, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=512, out_features=4, bias=True)
)
epoch:1  loss_train:1.1624  acc_train:0.4200  loss_dev:1.1483  acc_dev:0.4326
epoch:2  loss_train:1.1621  acc_train:0.3973  loss_dev:1.1492  acc_dev:0.3949
epoch:3  loss_train:1.1604  acc_train:0.4673  loss_dev:1.1466  acc_dev:0.4958
epoch:4  loss_train:1.1578  acc_train:0.4583  loss_dev:1.1430  acc_dev:0.4784
epoch:5  loss_train:1.1542  acc_train:0.4682  loss_dev:1.1376  acc_dev:0.4899
epoch:6  loss_train:1.1456  acc_train:0.4898  loss_dev:1.1304  acc_dev:0.5122
epoch:7  loss_train:1.1347  acc_train:0.4894  loss_dev:1.1178  acc_dev:0.5085
epoch:8  loss_train:1.1128  acc_train:0.5256  loss_dev:1.0983  acc_dev:0.5272
epoch:9  loss_train:1.0145  acc_train:0.6145  loss_dev:0.9881  acc_dev:0.6221
epoch:10  loss_train:0.9698  acc_train:0.6424  loss_dev:0.9519  acc_dev:0.6418


# **86. 畳込みニューラルネットワーク（CNN）**

In [None]:
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

# 最大系列長に揃える関数を定義：ミニバッチでは系列長を揃えないとダメ
def collate_fn_(batch):
    sequences = [x["inputs"] for x in batch]
    sequences_padded = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True, padding_value=len(set(word2id.values()))) # パディング
    labels = torch.LongTensor([x["labels"] for x in batch])

    return {'inputs': sequences_padded, 'labels': labels}


def calc_loss_accuracy(model, loader, criterion, device):
    with torch.no_grad():
        acc_list = []
        loss_list = []
        for batch in loader:
            x = batch["inputs"].to(device)
            t = batch["labels"].to(device)
            y = model(x)

            # loss計算
            loss = criterion(y, t).item()
            loss_list.append(loss)

            # accuracy計算
            pred = torch.argmax(y, dim=1)
            acc = (pred == t).sum().item() * 1.0 / len(t)
            acc_list.append(acc)

    return torch.tensor(loss_list).mean(), torch.tensor(acc_list).mean()


def train_model(batch_size, model, criterion, optimizer, num_epochs, nlp, device=None):
    model.to(device)

    log_train, log_dev = [], []
    for i in range(num_epochs):

        for batch in train_loader:
            x = batch["inputs"].to(device)
            t = batch["labels"].to(device)
            y = model(x)
            loss = criterion(y, t)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # 推論モード
        model.eval()

        loss_train, acc_train = calc_loss_accuracy(model, train_loader, criterion, device)
        loss_dev, acc_dev = calc_loss_accuracy(model, dev_loader, criterion, device)
        log_train.append([loss_train, acc_train])
        log_dev.append([loss_dev, acc_dev])

        # チェックポイント保存
        torch.save({"epoch":i, "model_state_dict":model.state_dict(), "optimizer_state_dict":optimizer.state_dict()}, f"{nlp}/checkpoint_{i+1}.pt")

        # ログ表示
        print(f"epoch:{i+1}  loss_train:{loss_train:.4f}  acc_train:{acc_train:.4f}  loss_dev:{loss_dev:.4f}  acc_dev:{acc_dev:.4f}")

In [None]:
from torch import nn
from torch.nn import functional as F

class CNN(nn.Module):
    def __init__(self, n_input, n_embed, n_output, padding_idx, out_channels, kernel_size, stride, padding):
        super(CNN, self).__init__()
        self.embed = nn.Embedding(num_embeddings=n_input, embedding_dim=n_embed, padding_idx=padding_idx)
        self.conv = nn.Conv2d(1, out_channels, (kernel_size, n_embed), stride, (padding, 0))
        self.fc = nn.Linear(in_features=out_channels, out_features=n_output)

    def forward(self, x):
        h = self.embed(x)                   # [1,len(x),embed_size]
        h = self.conv(h.unsqueeze(1))    # [batch_size,out_channnels,len(x),1]
        h = F.relu(h.squeeze(3))          # [batch_size,out_channnels,len(x)]
        h = F.max_pool1d(h, h.size()[2])  # [batch_size,out_channnels,1]
        y = self.fc(h.squeeze(2))
        return y

In [None]:
n_input = len(set(word2id.values())) + 1
n_embed = 300  # 単語ベクトルの次元
n_output = 4  # 出力ベクトルの次元（=ラベルの種類数）
padding_idx = len(set(word2id.values()))
out_channels = 100
kernel_size = 3
stride = 1
padding = 1

model = CNN(n_input, n_embed, n_output, padding_idx, out_channels, kernel_size, stride, padding)

# 先頭10件の予測値取得
for i in range(10):
    X = dataset_train[i]["inputs"]
    print(torch.softmax(model(X.unsqueeze(0)), dim=-1))

tensor([[0.2070, 0.1492, 0.2939, 0.3499]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2277, 0.1267, 0.3018, 0.3438]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2528, 0.1360, 0.3007, 0.3105]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2406, 0.1736, 0.2862, 0.2997]], grad_fn=<SoftmaxBackward0>)
tensor([[0.1836, 0.1954, 0.3485, 0.2725]], grad_fn=<SoftmaxBackward0>)
tensor([[0.1974, 0.2155, 0.2973, 0.2898]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2021, 0.1909, 0.2508, 0.3561]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2314, 0.2127, 0.2668, 0.2892]], grad_fn=<SoftmaxBackward0>)
tensor([[0.1971, 0.2304, 0.2500, 0.3226]], grad_fn=<SoftmaxBackward0>)
tensor([[0.2175, 0.1357, 0.2372, 0.4095]], grad_fn=<SoftmaxBackward0>)


# **87. 確率的勾配降下法によるCNNの学習**

In [None]:
batch_size = 32

# データセットオブジェクトからデータローダーを作成
train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=collate_fn_)
dev_loader = DataLoader(dataset_dev, batch_size=batch_size, shuffle=False, collate_fn=collate_fn_)
test_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=False, collate_fn=collate_fn_)

! mkdir nlp87
n_input = len(set(word2id.values())) + 1
n_embed = 300  # 単語ベクトルの次元
n_output = 4  # 出力ベクトルの次元（=ラベルの種類数）
padding_idx = len(set(word2id.values()))
out_channels = 100
kernel_size = 3
stride = 1
padding = 1
nlp = "nlp87"

model = CNN(n_input, n_embed, n_output, padding_idx, out_channels, kernel_size, stride, padding)

criterion = nn.CrossEntropyLoss() #損失関数

optimizer = torch.optim.SGD(model.parameters(), lr=0.05) # 最適化手法

device = torch.device("cuda")

train_model(32, model, criterion, optimizer, 10, nlp)

mkdir: cannot create directory ‘nlp87’: File exists
epoch:1  loss_train:0.3798  acc_train:0.8754  loss_dev:0.5309  acc_dev:0.8107
epoch:2  loss_train:0.1452  acc_train:0.9640  loss_dev:0.4315  acc_dev:0.8561
epoch:3  loss_train:0.0645  acc_train:0.9946  loss_dev:0.4411  acc_dev:0.8483
epoch:4  loss_train:0.0259  acc_train:0.9987  loss_dev:0.4231  acc_dev:0.8580
epoch:5  loss_train:0.0192  acc_train:0.9989  loss_dev:0.4437  acc_dev:0.8575
epoch:6  loss_train:0.0169  acc_train:0.9988  loss_dev:0.4347  acc_dev:0.8650
epoch:7  loss_train:0.0148  acc_train:0.9989  loss_dev:0.4421  acc_dev:0.8650
epoch:8  loss_train:0.0161  acc_train:0.9988  loss_dev:0.4587  acc_dev:0.8594
epoch:9  loss_train:0.0122  acc_train:0.9990  loss_dev:0.4511  acc_dev:0.8657
epoch:10  loss_train:0.0121  acc_train:0.9990  loss_dev:0.4548  acc_dev:0.8665


# **88. パラメータチューニング**

In [None]:
!pip install optuna

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna
  Downloading optuna-3.2.0-py3-none-any.whl (390 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m390.6/390.6 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.11.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cmaes>=0.9.1 (from optuna)
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, cmaes, alembic, optuna
Successfully 

In [None]:
def train_model_nosave(batch_size, model, criterion, optimizer, num_epochs, device=None):
    model.to(device)

    log_train, log_dev = [], []
    for i in range(num_epochs):

        for batch in train_loader:
            x = batch["inputs"].to(device)
            t = batch["labels"].to(device)
            y = model(x)
            loss = criterion(y, t)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # 推論モード
        model.eval()

        loss_train, acc_train = calc_loss_accuracy(model, train_loader, criterion, device)
        loss_dev, acc_dev = calc_loss_accuracy(model, dev_loader, criterion, device)
        log_train.append([loss_train, acc_train])
        log_dev.append([loss_dev, acc_dev])

        # ログ表示
        print(f"epoch:{i+1}  loss_train:{loss_train:.4f}  acc_train:{acc_train:.4f}  loss_dev:{loss_dev:.4f}  acc_dev:{acc_dev:.4f}")

In [None]:
class CNN(nn.Module):
    def __init__(self, n_input, n_embed, n_output, padding_idx, out_channels, kernel_size, stride, padding, dropout_rate):
        super(CNN, self).__init__()
        self.embed = nn.Embedding(num_embeddings=n_input, embedding_dim=n_embed, padding_idx=padding_idx)
        self.conv = nn.Conv2d(1, out_channels, (kernel_size, n_embed), stride, (padding, 0))
        self.fc = nn.Linear(in_features=out_channels, out_features=n_output)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        h = self.embed(x)                   # [1,len(x),embed_size]
        h = self.conv(h.unsqueeze(1))    # [batch_size,out_channnels,len(x),1]
        h = F.relu(h.squeeze(3))          # [batch_size,out_channnels,len(x)]
        h = F.max_pool1d(h, h.size()[2])  # [batch_size,out_channnels,1]
        y = self.fc(self.dropout(h.squeeze(2)))
        return y

In [None]:
import optuna

def objective(trial):
    # チューニング対象パラメータのセット
    emb_size = int(trial.suggest_discrete_uniform('emb_size', 100, 400, 100))
    out_channels = int(trial.suggest_discrete_uniform('out_channels', 50, 200, 50))
    drop_rate = trial.suggest_discrete_uniform('drop_rate', 0.0, 0.5, 0.1)
    learning_rate = trial.suggest_loguniform('learning_rate', 5e-4, 5e-2)
    momentum = trial.suggest_discrete_uniform('momentum', 0.5, 0.9, 0.1)

    # 固定
    n_input = len(set(word2id.values())) + 1
    n_output = 4  # 出力ベクトルの次元（=ラベルの種類数）
    padding_idx = len(set(word2id.values()))
    kernel_size = 3
    stride = 1
    padding = 1

    model = CNN(n_input, n_embed, n_output, padding_idx, out_channels, kernel_size, stride, padding, drop_rate)

    criterion = nn.CrossEntropyLoss() #損失関数

    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) # 最適化手法

    device = torch.cuda.set_device(0)

    train_model_nosave(32, model, criterion, optimizer, 5)

    _, dev_acc = calc_loss_accuracy(model, dev_loader, criterion, device)

    return dev_acc

In [None]:
# 最適化
study = optuna.create_study(direction="maximize")
study.optimize(objective, timeout=1200)

# 結果の表示
print('Best trial:')
trial = study.best_trial
print('  Value: {:.3f}'.format(trial.value))
print('  Params: ')
for key, value in trial.params.items():
  print('    {}: {}'.format(key, value))

[I 2023-06-22 10:36:19,569] A new study created in memory with name: no-name-bf49bab1-9b05-4fb7-8f05-0217fba0c903
  emb_size = int(trial.suggest_discrete_uniform('emb_size', 100, 400, 100))
  out_channels = int(trial.suggest_discrete_uniform('out_channels', 50, 200, 50))
  drop_rate = trial.suggest_discrete_uniform('drop_rate', 0.0, 0.5, 0.1)
  learning_rate = trial.suggest_loguniform('learning_rate', 5e-4, 5e-2)
  momentum = trial.suggest_discrete_uniform('momentum', 0.5, 0.9, 0.1)


epoch:1  loss_train:0.5040  acc_train:0.8280  loss_dev:0.5797  acc_dev:0.8111
epoch:2  loss_train:0.1446  acc_train:0.9654  loss_dev:0.4269  acc_dev:0.8583
epoch:3  loss_train:0.0429  acc_train:0.9949  loss_dev:0.4272  acc_dev:0.8584
epoch:4  loss_train:0.0213  acc_train:0.9985  loss_dev:0.4140  acc_dev:0.8695
epoch:5  loss_train:0.0154  acc_train:0.9988  loss_dev:0.4247  acc_dev:0.8684


[I 2023-06-22 10:37:05,246] Trial 0 finished with value: 0.8683712482452393 and parameters: {'emb_size': 300.0, 'out_channels': 100.0, 'drop_rate': 0.4, 'learning_rate': 0.006774270419053273, 'momentum': 0.9}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.5821  acc_train:0.8450  loss_dev:0.7283  acc_dev:0.7543
epoch:2  loss_train:0.1394  acc_train:0.9699  loss_dev:0.4279  acc_dev:0.8517
epoch:3  loss_train:0.0530  acc_train:0.9967  loss_dev:0.4585  acc_dev:0.8331
epoch:4  loss_train:0.0220  acc_train:0.9987  loss_dev:0.4136  acc_dev:0.8621
epoch:5  loss_train:0.0165  acc_train:0.9989  loss_dev:0.4205  acc_dev:0.8658


[I 2023-06-22 10:38:16,729] Trial 1 finished with value: 0.8658008575439453 and parameters: {'emb_size': 100.0, 'out_channels': 200.0, 'drop_rate': 0.30000000000000004, 'learning_rate': 0.024321959887221606, 'momentum': 0.5}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.9124  acc_train:0.6926  loss_dev:0.9094  acc_dev:0.6904
epoch:2  loss_train:0.7160  acc_train:0.7487  loss_dev:0.7521  acc_dev:0.7390
epoch:3  loss_train:0.5558  acc_train:0.8066  loss_dev:0.6493  acc_dev:0.7698
epoch:4  loss_train:0.4260  acc_train:0.8654  loss_dev:0.5751  acc_dev:0.7925


[I 2023-06-22 10:38:50,952] Trial 2 finished with value: 0.8103355169296265 and parameters: {'emb_size': 100.0, 'out_channels': 50.0, 'drop_rate': 0.5, 'learning_rate': 0.0011181016448577029, 'momentum': 0.9}. Best is trial 0 with value: 0.8683712482452393.


epoch:5  loss_train:0.3249  acc_train:0.9001  loss_dev:0.5223  acc_dev:0.8103
epoch:1  loss_train:0.9429  acc_train:0.6839  loss_dev:0.9418  acc_dev:0.6866
epoch:2  loss_train:0.7456  acc_train:0.7408  loss_dev:0.7752  acc_dev:0.7371
epoch:3  loss_train:0.6000  acc_train:0.7851  loss_dev:0.6671  acc_dev:0.7769
epoch:4  loss_train:0.4831  acc_train:0.8403  loss_dev:0.6023  acc_dev:0.7959


[I 2023-06-22 10:39:33,908] Trial 3 finished with value: 0.8010687232017517 and parameters: {'emb_size': 300.0, 'out_channels': 50.0, 'drop_rate': 0.4, 'learning_rate': 0.0035535148932308424, 'momentum': 0.6}. Best is trial 0 with value: 0.8683712482452393.


epoch:5  loss_train:0.3934  acc_train:0.8737  loss_dev:0.5548  acc_dev:0.8011
epoch:1  loss_train:0.6410  acc_train:0.7686  loss_dev:0.6953  acc_dev:0.7587
epoch:2  loss_train:0.3653  acc_train:0.8959  loss_dev:0.5333  acc_dev:0.8149
epoch:3  loss_train:0.1909  acc_train:0.9617  loss_dev:0.4432  acc_dev:0.8487
epoch:4  loss_train:0.1051  acc_train:0.9929  loss_dev:0.4232  acc_dev:0.8528
epoch:5  loss_train:0.0605  acc_train:0.9985  loss_dev:0.3939  acc_dev:0.8654


[I 2023-06-22 10:40:32,143] Trial 4 finished with value: 0.8653950691223145 and parameters: {'emb_size': 300.0, 'out_channels': 150.0, 'drop_rate': 0.1, 'learning_rate': 0.0108909590298002, 'momentum': 0.5}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:1.0175  acc_train:0.6417  loss_dev:1.0111  acc_dev:0.6361
epoch:2  loss_train:0.9086  acc_train:0.6833  loss_dev:0.9070  acc_dev:0.6906
epoch:3  loss_train:0.8284  acc_train:0.7125  loss_dev:0.8407  acc_dev:0.7092
epoch:4  loss_train:0.7561  acc_train:0.7349  loss_dev:0.7862  acc_dev:0.7204
epoch:5  loss_train:0.6894  acc_train:0.7572  loss_dev:0.7385  acc_dev:0.7330


[I 2023-06-22 10:41:22,139] Trial 5 finished with value: 0.7330222129821777 and parameters: {'emb_size': 300.0, 'out_channels': 100.0, 'drop_rate': 0.0, 'learning_rate': 0.001741461753940649, 'momentum': 0.5}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:1.0409  acc_train:0.6330  loss_dev:1.0239  acc_dev:0.6372
epoch:2  loss_train:0.9368  acc_train:0.6764  loss_dev:0.9286  acc_dev:0.6770
epoch:3  loss_train:0.8557  acc_train:0.7053  loss_dev:0.8597  acc_dev:0.7192
epoch:4  loss_train:0.7882  acc_train:0.7246  loss_dev:0.8032  acc_dev:0.7286
epoch:5  loss_train:0.7273  acc_train:0.7420  loss_dev:0.7617  acc_dev:0.7415


[I 2023-06-22 10:42:24,125] Trial 6 finished with value: 0.7415449023246765 and parameters: {'emb_size': 400.0, 'out_channels': 150.0, 'drop_rate': 0.2, 'learning_rate': 0.0011764401518101388, 'momentum': 0.6}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.4183  acc_train:0.8524  loss_dev:0.5664  acc_dev:0.7999
epoch:2  loss_train:0.1451  acc_train:0.9701  loss_dev:0.4482  acc_dev:0.8341
epoch:3  loss_train:0.0499  acc_train:0.9978  loss_dev:0.4143  acc_dev:0.8494
epoch:4  loss_train:0.0253  acc_train:0.9988  loss_dev:0.4130  acc_dev:0.8546
epoch:5  loss_train:0.0182  acc_train:0.9987  loss_dev:0.4106  acc_dev:0.8598


[I 2023-06-22 10:43:39,328] Trial 7 finished with value: 0.8597807884216309 and parameters: {'emb_size': 200.0, 'out_channels': 200.0, 'drop_rate': 0.0, 'learning_rate': 0.016379796306287792, 'momentum': 0.6}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.6259  acc_train:0.7824  loss_dev:0.6979  acc_dev:0.7446
epoch:2  loss_train:0.3373  acc_train:0.9042  loss_dev:0.5280  acc_dev:0.8059
epoch:3  loss_train:0.1817  acc_train:0.9615  loss_dev:0.4679  acc_dev:0.8308
epoch:4  loss_train:0.0951  acc_train:0.9948  loss_dev:0.4327  acc_dev:0.8513
epoch:5  loss_train:0.0569  acc_train:0.9983  loss_dev:0.4252  acc_dev:0.8431


[I 2023-06-22 10:44:34,056] Trial 8 finished with value: 0.8430736064910889 and parameters: {'emb_size': 300.0, 'out_channels': 150.0, 'drop_rate': 0.1, 'learning_rate': 0.0045048889231495935, 'momentum': 0.8}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.7219  acc_train:0.7481  loss_dev:0.7448  acc_dev:0.7450
epoch:2  loss_train:0.3331  acc_train:0.9143  loss_dev:0.5162  acc_dev:0.8182
epoch:3  loss_train:0.1630  acc_train:0.9702  loss_dev:0.4725  acc_dev:0.8308
epoch:4  loss_train:0.0805  acc_train:0.9897  loss_dev:0.4447  acc_dev:0.8568


[I 2023-06-22 10:45:13,154] Trial 9 finished with value: 0.8460497856140137 and parameters: {'emb_size': 100.0, 'out_channels': 50.0, 'drop_rate': 0.5, 'learning_rate': 0.019164534949524418, 'momentum': 0.5}. Best is trial 0 with value: 0.8683712482452393.


epoch:5  loss_train:0.0415  acc_train:0.9982  loss_dev:0.4777  acc_dev:0.8460
epoch:1  loss_train:nan  acc_train:0.4198  loss_dev:nan  acc_dev:0.4326
epoch:2  loss_train:nan  acc_train:0.4201  loss_dev:nan  acc_dev:0.4326
epoch:3  loss_train:nan  acc_train:0.4200  loss_dev:nan  acc_dev:0.4326
epoch:4  loss_train:nan  acc_train:0.4199  loss_dev:nan  acc_dev:0.4326
epoch:5  loss_train:nan  acc_train:0.4198  loss_dev:nan  acc_dev:0.4326


[I 2023-06-22 10:46:00,623] Trial 10 finished with value: 0.4325622320175171 and parameters: {'emb_size': 400.0, 'out_channels': 100.0, 'drop_rate': 0.30000000000000004, 'learning_rate': 0.04489196420125491, 'momentum': 0.9}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.5141  acc_train:0.8460  loss_dev:0.6345  acc_dev:0.7721
epoch:2  loss_train:0.1582  acc_train:0.9696  loss_dev:0.4378  acc_dev:0.8446
epoch:3  loss_train:0.0519  acc_train:0.9975  loss_dev:0.4093  acc_dev:0.8553
epoch:4  loss_train:0.0259  acc_train:0.9988  loss_dev:0.3931  acc_dev:0.8583
epoch:5  loss_train:0.0194  acc_train:0.9987  loss_dev:0.4151  acc_dev:0.8531


[I 2023-06-22 10:47:14,969] Trial 11 finished with value: 0.85308438539505 and parameters: {'emb_size': 200.0, 'out_channels': 200.0, 'drop_rate': 0.30000000000000004, 'learning_rate': 0.008590682908038879, 'momentum': 0.8}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:1.0931  acc_train:0.5855  loss_dev:1.0715  acc_dev:0.6097
epoch:2  loss_train:1.0227  acc_train:0.6395  loss_dev:1.0030  acc_dev:0.6606
epoch:3  loss_train:0.9582  acc_train:0.6715  loss_dev:0.9425  acc_dev:0.6877
epoch:4  loss_train:0.9041  acc_train:0.6895  loss_dev:0.8938  acc_dev:0.7026
epoch:5  loss_train:0.8590  acc_train:0.7070  loss_dev:0.8538  acc_dev:0.7256


[I 2023-06-22 10:48:09,917] Trial 12 finished with value: 0.725581705570221 and parameters: {'emb_size': 200.0, 'out_channels': 100.0, 'drop_rate': 0.4, 'learning_rate': 0.0005265940138148248, 'momentum': 0.7}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:88.8337  acc_train:0.6163  loss_dev:80.8621  acc_dev:0.6375
epoch:2  loss_train:37.8870  acc_train:0.7984  loss_dev:58.4575  acc_dev:0.7257
epoch:3  loss_train:17.7868  acc_train:0.9080  loss_dev:44.5391  acc_dev:0.8364
epoch:4  loss_train:7.1672  acc_train:0.9621  loss_dev:43.0792  acc_dev:0.8617
epoch:5  loss_train:6.8836  acc_train:0.9690  loss_dev:53.3706  acc_dev:0.8554


[I 2023-06-22 10:49:52,330] Trial 13 finished with value: 0.8553841710090637 and parameters: {'emb_size': 100.0, 'out_channels': 200.0, 'drop_rate': 0.4, 'learning_rate': 0.0453975264082803, 'momentum': 0.8}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.6436  acc_train:0.7696  loss_dev:0.7044  acc_dev:0.7468
epoch:2  loss_train:0.3465  acc_train:0.9303  loss_dev:0.5520  acc_dev:0.8238
epoch:3  loss_train:0.1723  acc_train:0.9754  loss_dev:0.4568  acc_dev:0.8405
epoch:4  loss_train:0.0878  acc_train:0.9950  loss_dev:0.4189  acc_dev:0.8590
epoch:5  loss_train:0.0497  acc_train:0.9988  loss_dev:0.4073  acc_dev:0.8665


[I 2023-06-22 10:50:58,406] Trial 14 finished with value: 0.8664772510528564 and parameters: {'emb_size': 200.0, 'out_channels': 150.0, 'drop_rate': 0.2, 'learning_rate': 0.007345041174066163, 'momentum': 0.7}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.6709  acc_train:0.7570  loss_dev:0.7231  acc_dev:0.7383
epoch:2  loss_train:0.3777  acc_train:0.8809  loss_dev:0.5571  acc_dev:0.7940
epoch:3  loss_train:0.2023  acc_train:0.9644  loss_dev:0.4742  acc_dev:0.8350
epoch:4  loss_train:0.1136  acc_train:0.9901  loss_dev:0.4546  acc_dev:0.8364
epoch:5  loss_train:0.0660  acc_train:0.9975  loss_dev:0.4426  acc_dev:0.8456


[I 2023-06-22 10:51:53,646] Trial 15 finished with value: 0.8456438779830933 and parameters: {'emb_size': 200.0, 'out_channels': 100.0, 'drop_rate': 0.2, 'learning_rate': 0.006814857305992061, 'momentum': 0.7}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.7183  acc_train:0.7412  loss_dev:0.7458  acc_dev:0.7309
epoch:2  loss_train:0.4442  acc_train:0.8408  loss_dev:0.5713  acc_dev:0.8011
epoch:3  loss_train:0.2578  acc_train:0.9387  loss_dev:0.4841  acc_dev:0.8341
epoch:4  loss_train:0.1533  acc_train:0.9814  loss_dev:0.4445  acc_dev:0.8453
epoch:5  loss_train:0.0910  acc_train:0.9959  loss_dev:0.4277  acc_dev:0.8501


[I 2023-06-22 10:52:54,782] Trial 16 finished with value: 0.8501082062721252 and parameters: {'emb_size': 400.0, 'out_channels': 150.0, 'drop_rate': 0.2, 'learning_rate': 0.0037104881058191725, 'momentum': 0.8}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.3916  acc_train:0.8724  loss_dev:0.5380  acc_dev:0.8038
epoch:2  loss_train:0.1296  acc_train:0.9774  loss_dev:0.4323  acc_dev:0.8494
epoch:3  loss_train:0.0436  acc_train:0.9968  loss_dev:0.4283  acc_dev:0.8509
epoch:4  loss_train:0.0223  acc_train:0.9989  loss_dev:0.4182  acc_dev:0.8657
epoch:5  loss_train:0.0166  acc_train:0.9989  loss_dev:0.4281  acc_dev:0.8661


[I 2023-06-22 10:53:42,150] Trial 17 finished with value: 0.8661391139030457 and parameters: {'emb_size': 300.0, 'out_channels': 100.0, 'drop_rate': 0.1, 'learning_rate': 0.005630747746703537, 'momentum': 0.9}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.6406  acc_train:0.7536  loss_dev:0.6918  acc_dev:0.7415
epoch:2  loss_train:0.2350  acc_train:0.9494  loss_dev:0.4621  acc_dev:0.8356
epoch:3  loss_train:0.0967  acc_train:0.9941  loss_dev:0.4364  acc_dev:0.8446
epoch:4  loss_train:0.0377  acc_train:0.9987  loss_dev:0.4053  acc_dev:0.8568
epoch:5  loss_train:0.0238  acc_train:0.9988  loss_dev:0.4180  acc_dev:0.8516


[I 2023-06-22 10:54:42,389] Trial 18 finished with value: 0.8515962958335876 and parameters: {'emb_size': 200.0, 'out_channels': 150.0, 'drop_rate': 0.4, 'learning_rate': 0.011597151999298564, 'momentum': 0.7}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.7381  acc_train:0.7459  loss_dev:0.7598  acc_dev:0.7365
epoch:2  loss_train:0.3290  acc_train:0.8954  loss_dev:0.5076  acc_dev:0.8197
epoch:3  loss_train:0.1493  acc_train:0.9709  loss_dev:0.4386  acc_dev:0.8420
epoch:4  loss_train:0.0686  acc_train:0.9962  loss_dev:0.4322  acc_dev:0.8509
epoch:5  loss_train:0.0367  acc_train:0.9990  loss_dev:0.4248  acc_dev:0.8572


[I 2023-06-22 10:55:30,162] Trial 19 finished with value: 0.8572105169296265 and parameters: {'emb_size': 300.0, 'out_channels': 100.0, 'drop_rate': 0.5, 'learning_rate': 0.006574549387806996, 'momentum': 0.8}. Best is trial 0 with value: 0.8683712482452393.


epoch:1  loss_train:0.9531  acc_train:0.6748  loss_dev:0.9472  acc_dev:0.6839
epoch:2  loss_train:0.7837  acc_train:0.7242  loss_dev:0.8057  acc_dev:0.7167
epoch:3  loss_train:0.6539  acc_train:0.7745  loss_dev:0.7114  acc_dev:0.7505
epoch:4  loss_train:0.5414  acc_train:0.8044  loss_dev:0.6311  acc_dev:0.7680
epoch:5  loss_train:0.4431  acc_train:0.8612  loss_dev:0.5773  acc_dev:0.7978


[I 2023-06-22 10:56:33,342] Trial 20 finished with value: 0.7977544069290161 and parameters: {'emb_size': 200.0, 'out_channels': 150.0, 'drop_rate': 0.30000000000000004, 'learning_rate': 0.0026250992718907813, 'momentum': 0.6}. Best is trial 0 with value: 0.8683712482452393.


Best trial:
  Value: 0.868
  Params: 
    emb_size: 300.0
    out_channels: 100.0
    drop_rate: 0.4
    learning_rate: 0.006774270419053273
    momentum: 0.9


In [None]:
! mkdir nlp88
n_input = len(set(word2id.values())) + 1
n_embed = int(trial.params['emb_size'])  # 単語ベクトルの次元
n_output = 4  # 出力ベクトルの次元（=ラベルの種類数）
padding_idx = len(set(word2id.values()))
out_channels = int(trial.params['out_channels'])
kernel_size = 3
stride = 1
padding = 1
learning_rate = trial.params['learning_rate']
momentum = trial.params['momentum']
drop_rate = trial.params['drop_rate']
nlp = "nlp88"

model = CNN(n_input, n_embed, n_output, padding_idx, out_channels, kernel_size, stride, padding, drop_rate)

criterion = nn.CrossEntropyLoss() #損失関数

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) # 最適化手法

device = torch.device("cuda")

train_model(32, model, criterion, optimizer, 10, nlp)

mkdir: cannot create directory ‘nlp88’: File exists
epoch:1  loss_train:0.5239  acc_train:0.8165  loss_dev:0.6162  acc_dev:0.7792
epoch:2  loss_train:0.1516  acc_train:0.9618  loss_dev:0.4699  acc_dev:0.8341
epoch:3  loss_train:0.0560  acc_train:0.9937  loss_dev:0.4825  acc_dev:0.8494
epoch:4  loss_train:0.0219  acc_train:0.9989  loss_dev:0.4856  acc_dev:0.8519
epoch:5  loss_train:0.0172  acc_train:0.9986  loss_dev:0.4796  acc_dev:0.8586
epoch:6  loss_train:0.0202  acc_train:0.9978  loss_dev:0.5235  acc_dev:0.8504
epoch:7  loss_train:0.0131  acc_train:0.9990  loss_dev:0.5040  acc_dev:0.8512
epoch:8  loss_train:0.0153  acc_train:0.9989  loss_dev:0.4995  acc_dev:0.8653
epoch:9  loss_train:0.0123  acc_train:0.9990  loss_dev:0.5117  acc_dev:0.8631
epoch:10  loss_train:0.0129  acc_train:0.9990  loss_dev:0.5121  acc_dev:0.8542
