### 第8章: ニューラルネット

第7章で取り組んだポジネガ分類を題材として、ニューラルネットワークで分類モデルを実装する。なお、この章ではPyTorchやTensorFlow、JAXなどの深層学習フレームワークを活用せよ。

In [25]:
"""
70. 単語埋め込みの読み込み
事前学習済み単語埋め込みを活用し、|V| * d_{emb} の単語埋め込み行列 E を作成せよ。
ここで、d_{emb} は単語埋め込みの語彙数、|V|は単語埋め込みの次元数である。
ただし、単語埋め込み行列の先頭の行ベクトルE_{0}は、将来的にパディング（<PAD>）トークンの埋め込みベクトルとして用いたいので、ゼロベクトルとして予約せよ。
ゆえに、E の2行目以降に事前学習済み単語埋め込みを読み込むことになる。

もし、Google Newsデータセットの学習済み単語ベクトル（300万単語・フレーズ、300次元）を全て読み込んだ場合、
|V| = 3000001,d_{emb} = 300になるはずである（ただ、300万単語の中には、殆ど用いられない稀な単語も含まれるので、語彙を削減した方がメモリの節約になる）。

また、単語埋め込み行列の構築と同時に、単語埋め込み行列の各行のインデックス番号（トークンID）と、単語（トークン）への双方向の対応付けを保持せよ。

"""
from gensim.models import KeyedVectors
import pandas as pd
import numpy as np

def	extract_vocab(data):
    all_words = set()
    for sentence in data['sentence']:
        words = sentence.split()
        all_words.update(word.lower() for word in words)
    return all_words

def create_matrix(model_path, train_data, dev_data):
    train_words = extract_vocab(train_data)
    dev_words = extract_vocab(dev_data)
    all_words = train_words.union(dev_words)

    word2vec = KeyedVectors.load_word2vec_format(model_path, binary=True)
    embedding_dim = word2vec.vector_size

    embedding_matrix = [np.zeros(embedding_dim)]
    word_to_id = {"<PAD>": 0}
    id_to_word = {0: "<PAD>"}

    idx = 1
    for word in all_words:
        if word in word2vec:
            embedding_matrix.append(word2vec[word])
            word_to_id[word] = idx
            id_to_word[idx] = word
            idx += 1

    embedding_matrix = np.array(embedding_matrix)

    return embedding_matrix, word_to_id, id_to_word

sst2_train = pd.read_csv('SST-2/train.tsv', sep='\t')
sst2_dev = pd.read_csv('SST-2/dev.tsv', sep='\t')
file = 'data/GoogleNews-vectors-negative300.bin.gz'
embedding_matrix, word_to_id, id_to_word = create_matrix(file, sst2_train, sst2_dev)
print("\n最初の5単語:")
for i in range(5):
    print(f"{id_to_word[i]}, ベクトル: {embedding_matrix[i][:5]}...")




最初の5単語:
<PAD>, ベクトル: [0. 0. 0. 0. 0.]...
discovered, ベクトル: [-0.07910156  0.15917969 -0.01464844  0.05859375 -0.05395508]...
mermaid, ベクトル: [ 0.17675781 -0.06054688 -0.140625    0.02819824 -0.15429688]...
anonymous, ベクトル: [ 0.1796875   0.12011719 -0.14550781 -0.33203125 -0.12597656]...
fairies, ベクトル: [ 0.2734375   0.04980469 -0.10791016  0.16308594 -0.21386719]...


In [26]:
"""
71. データセットの読み込み
General Language Understanding Evaluation (GLUE) ベンチマークで配布されているStanford Sentiment Treebank (SST) をダウンロードし、
訓練セット（train.tsv）と開発セット（dev.tsv）のテキストと極性ラベルと読み込み、全てのテキストをトークンID列に変換せよ。

このとき、単語埋め込みの語彙でカバーされていない単語は無視し、トークン列に含めないことにせよ。
また、テキストの全トークンが単語埋め込みの語彙に含まれておらず、空のトークン列となってしまう事例は、訓練セットおよび開発セットから削除せよ
（このため、第7章の実験で得られた正解率と比較できなくなることに注意せよ）。

事例の表現方法は任意でよいが、例えば”contains no wit , only labored gags”がネガティブに分類される事例は、次のような辞書オブジェクトで表現すればよい。

{'text': 'contains no wit , only labored gags',
 'label': tensor([0.]),
 'input_ids': tensor([ 3475,    87, 15888,    90, 27695, 42637])}
この例では、textはテキスト、labelは分類ラベル（ポジティブならtensor([1.])、ネガティブならtensor([0.])）、input_idsはテキストのトークン列をID列で表現している。
"""
import pandas as pd
import torch
from pprint import pprint

train_df = pd.read_csv("SST-2/train.tsv", sep="\t")
dev_df = pd.read_csv("SST-2/dev.tsv", sep="\t")

def text_to_ids(df, word_to_id):
    dct_lst = []

    for _, row in df.iterrows():
        sentence = row["sentence"]
        label = torch.tensor([float(row["label"])])

        words = sentence.split()
        ids = torch.tensor([word_to_id[token.lower()] for token in words if token in word_to_id])
        
        if len(ids) != 0 :
            dct_lst.append({'text':sentence,
                'label':label,
                'input_ids': ids
            })
        
    return dct_lst

train_ids_list  = text_to_ids(train_df, word_to_id)
dev_ids_list = text_to_ids(dev_df, word_to_id)
pprint(train_ids_list[0])


{'input_ids': tensor([9071, 4483,  407, 9085, 7860, 1760, 4795]),
 'label': tensor([0.]),
 'text': 'hide new secretions from the parental units '}


In [42]:
"""
72. Bag of wordsモデルの構築
単語埋め込みの平均ベクトルでテキストの特徴ベクトルを表現し、重みベクトルとの内積でポジティブ及びネガティブを分類する
ニューラルネットワーク（ロジスティック回帰モデル）を設計せよ。
"""
import torch.nn as nn

class LogisticRegression(nn.Module):
    def __init__(self, embedding_matrix, vec_dim, output_dim, freeze=True):
        super().__init__()

        embedding_weights = torch.tensor(embedding_matrix, dtype=torch.float32)
        self.embedding = nn.Embedding.from_pretrained(embedding_weights, freeze=freeze)

        self.linear = nn.Linear(vec_dim, output_dim)

    def forward(self, input_ids_list):
        batch_vectors = []
        
        for input_ids in input_ids_list:
            emb = self.embedding(input_ids)
            avg_vec = torch.mean(emb, dim=0)
            batch_vectors.append(avg_vec)

        batch_tensor = torch.stack(batch_vectors)
        out = torch.sigmoid(self.linear(batch_tensor))
        return out

In [34]:
"""
73. モデルの学習
問題72で設計したモデルの重みベクトルを訓練セット上で学習せよ。
ただし、学習中は単語埋め込み行列の値を固定せよ（単語埋め込み行列のファインチューニングは行わない）。
また、学習時に損失値を表示するなど、学習の進捗状況をモニタリングできるようにせよ。
"""
import torch.optim as optim

def train(model, train_data, optimizer, loss_fn):
    model.train()
    train_X = [d['input_ids'] for d in train_data]
    train_Y = torch.stack([d['label'] for d in train_data])

    optimizer.zero_grad()
    pred = model(train_X)
    loss = loss_fn(pred, train_Y)
    loss.backward()
    optimizer.step()

    pred_label = (pred >= 0.5).float()
    corrects = torch.sum(pred_label == train_Y)
    train_acc = corrects / len(train_X)

    return loss.item(), train_acc.item()

def evaluate(model, dev_data, loss_fn):
    model.eval()
    with torch.no_grad():
        dev_X = [d['input_ids'] for d in dev_data]
        dev_Y = torch.stack([d['label'] for d in dev_data])
        pred = model(dev_X)
        loss = loss_fn(pred, dev_Y)

        pred_label = (pred >= 0.5).float()
        corrects = torch.sum(pred_label == dev_Y)
        valid_acc = corrects / len(dev_Y)

    return loss.item(), valid_acc.item()

model = LogisticRegression(embedding_matrix, 300, 1)
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
max_epochs = 1000
best_valid_loss = float('inf')
early_stopping_cnt = 0
patience = 5
save_path = 'model/LogisticRegression73.pth'

for epoch in range(max_epochs):
    print(f"=================== {epoch+1} / {max_epochs} epoch ===================")
    
    train_loss, train_acc = train(model, train_ids_list, optimizer, loss_fn)
    print(f"Train  loss: {train_loss:.4f}, acc: {train_acc:.2f}")
    
    valid_loss, valid_acc = evaluate(model, dev_ids_list, loss_fn)
    print(f"Valid  loss: {valid_loss:.4f}, acc: {valid_acc:.2f}")

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        early_stopping_cnt = 0

        torch.save(model.state_dict(), save_path)
    else:
        early_stopping_cnt += 1
        print(f"EarlyStopping: {early_stopping_cnt} / {patience}")

        if early_stopping_cnt >= patience:
            break



Train  loss: 0.6948, acc: 0.48
Valid  loss: 0.6934, acc: 0.50
Train  loss: 0.6935, acc: 0.50
Valid  loss: 0.6929, acc: 0.53
Train  loss: 0.6922, acc: 0.53
Valid  loss: 0.6924, acc: 0.54
Train  loss: 0.6910, acc: 0.55
Valid  loss: 0.6919, acc: 0.54
Train  loss: 0.6897, acc: 0.57
Valid  loss: 0.6914, acc: 0.54
Train  loss: 0.6885, acc: 0.58
Valid  loss: 0.6910, acc: 0.54
Train  loss: 0.6873, acc: 0.58
Valid  loss: 0.6905, acc: 0.54
Train  loss: 0.6861, acc: 0.59
Valid  loss: 0.6900, acc: 0.53
Train  loss: 0.6849, acc: 0.59
Valid  loss: 0.6896, acc: 0.53
Train  loss: 0.6838, acc: 0.59
Valid  loss: 0.6891, acc: 0.52
Train  loss: 0.6826, acc: 0.59
Valid  loss: 0.6887, acc: 0.52
Train  loss: 0.6815, acc: 0.59
Valid  loss: 0.6883, acc: 0.52
Train  loss: 0.6804, acc: 0.59
Valid  loss: 0.6878, acc: 0.52
Train  loss: 0.6793, acc: 0.59
Valid  loss: 0.6874, acc: 0.52
Train  loss: 0.6782, acc: 0.59
Valid  loss: 0.6870, acc: 0.52
Train  loss: 0.6771, acc: 0.59
Valid  loss: 0.6865, acc: 0.51
Train  l

In [35]:
"""
74. モデルの評価
問題73で学習したモデルの開発セットにおける正解率を求めよ。
"""

def evaluate(model, dev_data, loss_fn):
    model.eval()
    with torch.no_grad():
        dev_X = [d['input_ids'] for d in dev_data]
        dev_Y = torch.stack([d['label'] for d in dev_data])
        pred = model(dev_X)
        loss = loss_fn(pred, dev_Y)

        pred_label = (pred >= 0.5).float()
        corrects = torch.sum(pred_label == dev_Y)
        valid_acc = corrects / len(dev_Y)

    return loss.item(), valid_acc.item()

model = LogisticRegression(embedding_matrix, 300, 1, freeze=False)
model.load_state_dict(torch.load('model/LogisticRegression73.pth'))
model.eval()

loss_fn = nn.BCELoss()
valid_loss, valid_acc = evaluate(model, dev_ids_list, loss_fn)
print(f"Valid  loss: {valid_loss:.4f}, acc: {valid_acc:.2f}")

Valid  loss: 0.5110, acc: 0.78


In [36]:
"""
75. パディング
複数の事例が与えられたとき、これらをまとめて一つのテンソル・オブジェクトで表現する関数collateを実装せよ。
与えられた複数の事例のトークン列の長さが異なるときは、トークン列の長さが最も長いものに揃え、0番のトークンIDでパディングをせよ。
さらに、トークン列の長さが長いものから順に、事例を並び替えよ。

例えば、訓練データセットの冒頭の4事例が次のように表されているとき、
[{'text': 'hide new secretions from the parental units',
  'label': tensor([0.]),
  'input_ids': tensor([  5785,     66, 113845,     18,     12,  15095,   1594])},
 {'text': 'contains no wit , only labored gags',
  'label': tensor([0.]),
  'input_ids': tensor([ 3475,    87, 15888,    90, 27695, 42637])},
 {'text': 'that loves its characters and communicates something rather beautiful about human nature',
  'label': tensor([1.]),
  'input_ids': tensor([    4,  5053,    45,  3305, 31647,   348,   904,  2815,    47,  1276,  1964])},
 {'text': 'remains utterly satisfied to remain the same throughout',
  'label': tensor([0.]),
  'input_ids': tensor([  987, 14528,  4941,   873,    12,   208,   898])}]
collate関数を通した結果は以下のようになることが想定される。
{'input_ids': tensor([
    [     4,   5053,     45,   3305,  31647,    348,    904,   2815,     47,   1276,   1964],
    [  5785,     66, 113845,     18,     12,  15095,   1594,      0,      0,      0,      0],
    [   987,  14528,   4941,    873,     12,    208,    898,      0,      0,      0,      0],
    [  3475,     87,  15888,     90,  27695,  42637,      0,      0,      0,      0,      0]]),
 'label': tensor([
    [1.],
    [0.],
    [0.],
    [0.]])}
"""
from torch.nn.utils.rnn import pad_sequence
def collate(dec_list):
  dec_list = sorted(dec_list, key=lambda x: len(x['input_ids']), reverse=True)
  input_ids_list = [dec['input_ids'] for dec in dec_list]
  label_list = torch.stack([dec['label'] for dec in dec_list])
  padded_input_ids = pad_sequence(input_ids_list, batch_first=True)

  return {"input_ids":padded_input_ids,"label":label_list}
    
pprint(train_ids_list[:4])
print()
pprint(collate(train_ids_list[:4]))

[{'input_ids': tensor([9071, 4483,  407, 9085, 7860, 1760, 4795]),
  'label': tensor([0.]),
  'text': 'hide new secretions from the parental units '},
 {'input_ids': tensor([ 3443,  7755, 11276,  6028,  8205,  6519]),
  'label': tensor([0.]),
  'text': 'contains no wit , only labored gags '},
 {'input_ids': tensor([ 9122,  3991,  8988,  4634,  4170,  2448,  4722,  4578,  6190, 12763,
         5458]),
  'label': tensor([1.]),
  'text': 'that loves its characters and communicates something rather '
          'beautiful about human nature '},
 {'input_ids': tensor([ 8126, 10517,  3108,   517,  7860,  3482, 10153]),
  'label': tensor([0.]),
  'text': 'remains utterly satisfied to remain the same throughout '}]

{'input_ids': tensor([[ 9122,  3991,  8988,  4634,  4170,  2448,  4722,  4578,  6190, 12763,
          5458],
        [ 9071,  4483,   407,  9085,  7860,  1760,  4795,     0,     0,     0,
             0],
        [ 8126, 10517,  3108,   517,  7860,  3482, 10153,     0,     0,     0

In [43]:
"""
76. ミニバッチ学習
問題75のパディングの処理を活用して、ミニバッチでモデルを学習せよ。また、学習したモデルの開発セットにおける正解率を求めよ。
"""

from torch.utils.data import DataLoader
import torch.optim as optim

def train(model, train_loader, optimizer, loss_fn):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for batch in train_loader:
        train_X = batch["input_ids"]
        train_Y = batch["label"]

        optimizer.zero_grad()
        preds = model(train_X)
        loss = loss_fn(preds, train_Y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * train_X.size(0)
        pred_labels = (preds >= 0.5).float()
        total_correct += (pred_labels == train_Y).sum().item()
        total_samples += train_X.size(0)

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples

    return avg_loss, avg_acc

def evaluate(model, valid_loader, loss_fn):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
         for batch in valid_loader:
            dev_X = batch["input_ids"]
            dev_Y = batch["label"]

            preds = model(dev_X)
            loss = loss_fn(preds, dev_Y)

            total_loss += loss.item() * dev_X.size(0)
            pred_labels = (preds >= 0.5).float()
            total_correct += (pred_labels == dev_Y).sum().item()
            total_samples += dev_X.size(0)

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples

    return avg_loss, avg_acc

train_loader = DataLoader(train_ids_list, batch_size=32, shuffle=True, collate_fn=collate)
valid_loader = DataLoader(dev_ids_list, batch_size=32, shuffle=False, collate_fn=collate)

model = LogisticRegression(embedding_matrix, 300, 1)
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

max_epochs = 10
best_valid_loss = float('inf')
early_stopping_cnt = 0
patience = 5
save_path = 'model/LogisticRegression76.pth'

for epoch in range(max_epochs):
    print(f"=================== {epoch+1} / {max_epochs} epoch ===================")
    
    train_loss, train_acc = train(model, train_loader, optimizer, loss_fn)
    print(f"Train  loss: {train_loss:.4f}, acc: {train_acc:.2f}")
    
    valid_loss, valid_acc = evaluate(model, valid_loader, loss_fn)
    print(f"Valid  loss: {valid_loss:.4f}, acc: {valid_acc:.2f}")

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        early_stopping_cnt = 0

        torch.save(model.state_dict(), save_path)
    else:
        early_stopping_cnt += 1
        print(f"EarlyStopping: {early_stopping_cnt} / {patience}")

        if early_stopping_cnt >= patience:
            break



Train  loss: 0.6201, acc: 0.68
Valid  loss: 0.5729, acc: 0.75
Train  loss: 0.5372, acc: 0.78
Valid  loss: 0.5235, acc: 0.77
Train  loss: 0.4937, acc: 0.81
Valid  loss: 0.4990, acc: 0.78
Train  loss: 0.4683, acc: 0.82
Valid  loss: 0.4898, acc: 0.78
Train  loss: 0.4520, acc: 0.83
Valid  loss: 0.4825, acc: 0.78
Train  loss: 0.4393, acc: 0.83
Valid  loss: 0.4744, acc: 0.79
Train  loss: 0.4320, acc: 0.83
Valid  loss: 0.4768, acc: 0.78
EarlyStopping: 1 / 5
Train  loss: 0.4250, acc: 0.83
Valid  loss: 0.4756, acc: 0.78
EarlyStopping: 2 / 5
Train  loss: 0.4197, acc: 0.84
Valid  loss: 0.4733, acc: 0.78
Train  loss: 0.4156, acc: 0.84
Valid  loss: 0.4736, acc: 0.79
EarlyStopping: 1 / 5


In [44]:
"""
77. GPU上での学習
問題76のモデル学習をGPU上で実行せよ。また、学習したモデルの開発セットにおける正解率を求めよ。
"""
from torch.utils.data import DataLoader
import torch.optim as optim

def train(model, train_loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for batch in train_loader:
        train_X = batch["input_ids"].to(device)
        train_Y = batch["label"].to(device)

        optimizer.zero_grad()
        preds = model(train_X)
        loss = loss_fn(preds, train_Y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * train_X.size(0)
        pred_labels = (preds >= 0.5).float()
        total_correct += (pred_labels == train_Y).sum().item()
        total_samples += train_X.size(0)

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples

    return avg_loss, avg_acc

def evaluate(model, valid_loader, loss_fn, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
         for batch in valid_loader:
            dev_X = batch["input_ids"].to(device)
            dev_Y = batch["label"].to(device)

            preds = model(dev_X)
            loss = loss_fn(preds, dev_Y)

            total_loss += loss.item() * dev_X.size(0)
            pred_labels = (preds >= 0.5).float()
            total_correct += (pred_labels == dev_Y).sum().item()
            total_samples += dev_X.size(0)

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples

    return avg_loss, avg_acc

train_loader = DataLoader(train_ids_list, batch_size=32, shuffle=True, collate_fn=collate)
valid_loader = DataLoader(dev_ids_list, batch_size=32, shuffle=False, collate_fn=collate)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LogisticRegression(embedding_matrix, 300, 1).to(device)
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

max_epochs = 10
best_valid_loss = float('inf')
early_stopping_cnt = 0
patience = 5
save_path = 'model/LogisticRegression77.pth'

for epoch in range(max_epochs):
    print(f"=================== {epoch+1} / {max_epochs} epoch ===================")
    
    train_loss, train_acc = train(model, train_loader, optimizer, loss_fn, device)
    print(f"Train  loss: {train_loss:.4f}, acc: {train_acc:.2f}")
    
    valid_loss, valid_acc = evaluate(model, valid_loader, loss_fn, device)
    print(f"Valid  loss: {valid_loss:.4f}, acc: {valid_acc:.2f}")

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        early_stopping_cnt = 0

        torch.save(model.state_dict(), save_path)
    else:
        early_stopping_cnt += 1
        print(f"EarlyStopping: {early_stopping_cnt} / {patience}")

        if early_stopping_cnt >= patience:
            break


Train  loss: 0.6201, acc: 0.68
Valid  loss: 0.5733, acc: 0.74
Train  loss: 0.5370, acc: 0.78
Valid  loss: 0.5233, acc: 0.77
Train  loss: 0.4938, acc: 0.81
Valid  loss: 0.5009, acc: 0.77
Train  loss: 0.4679, acc: 0.82
Valid  loss: 0.4871, acc: 0.78
Train  loss: 0.4511, acc: 0.83
Valid  loss: 0.4835, acc: 0.78
Train  loss: 0.4401, acc: 0.83
Valid  loss: 0.4790, acc: 0.78
Train  loss: 0.4316, acc: 0.83
Valid  loss: 0.4735, acc: 0.78
Train  loss: 0.4248, acc: 0.83
Valid  loss: 0.4702, acc: 0.79
Train  loss: 0.4197, acc: 0.84
Valid  loss: 0.4733, acc: 0.79
EarlyStopping: 1 / 5
Train  loss: 0.4162, acc: 0.84
Valid  loss: 0.4741, acc: 0.79
EarlyStopping: 2 / 5


In [45]:
"""
78. 単語埋め込みのファインチューニング
問題77の学習において、単語埋め込みのパラメータも同時に更新するファインチューニングを導入せよ。また、学習したモデルの開発セットにおける正解率を求めよ。
"""
from torch.utils.data import DataLoader
import torch.optim as optim

def train(model, train_loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for batch in train_loader:
        train_X = batch["input_ids"].to(device)
        train_Y = batch["label"].to(device)

        optimizer.zero_grad()
        preds = model(train_X)
        loss = loss_fn(preds, train_Y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * train_X.size(0)
        pred_labels = (preds >= 0.5).float()
        total_correct += (pred_labels == train_Y).sum().item()
        total_samples += train_X.size(0)

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples

    return avg_loss, avg_acc

def evaluate(model, valid_loader, loss_fn, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
         for batch in valid_loader:
            dev_X = batch["input_ids"].to(device)
            dev_Y = batch["label"].to(device)

            preds = model(dev_X)
            loss = loss_fn(preds, dev_Y)

            total_loss += loss.item() * dev_X.size(0)
            pred_labels = (preds >= 0.5).float()
            total_correct += (pred_labels == dev_Y).sum().item()
            total_samples += dev_X.size(0)

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples

    return avg_loss, avg_acc

train_loader = DataLoader(train_ids_list, batch_size=32, shuffle=True, collate_fn=collate)
valid_loader = DataLoader(dev_ids_list, batch_size=32, shuffle=False, collate_fn=collate)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LogisticRegression(embedding_matrix, 300, 1, False).to(device)
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

max_epochs = 10
best_valid_loss = float('inf')
early_stopping_cnt = 0
patience = 5
save_path = 'model/LogisticRegression78.pth'

for epoch in range(max_epochs):
    print(f"=================== {epoch+1} / {max_epochs} epoch ===================")
    
    train_loss, train_acc = train(model, train_loader, optimizer, loss_fn, device)
    print(f"Train  loss: {train_loss:.4f}, acc: {train_acc:.2f}")
    
    valid_loss, valid_acc = evaluate(model, valid_loader, loss_fn, device)
    print(f"Valid  loss: {valid_loss:.4f}, acc: {valid_acc:.2f}")

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        early_stopping_cnt = 0

        torch.save(model.state_dict(), save_path)
    else:
        early_stopping_cnt += 1
        print(f"EarlyStopping: {early_stopping_cnt} / {patience}")

        if early_stopping_cnt >= patience:
            break


Train  loss: 0.4183, acc: 0.81
Valid  loss: 0.4494, acc: 0.81
Train  loss: 0.2454, acc: 0.91
Valid  loss: 0.5352, acc: 0.80
EarlyStopping: 1 / 5
Train  loss: 0.2073, acc: 0.92
Valid  loss: 0.6154, acc: 0.81
EarlyStopping: 2 / 5
Train  loss: 0.1881, acc: 0.93
Valid  loss: 0.7025, acc: 0.80
EarlyStopping: 3 / 5
Train  loss: 0.1769, acc: 0.93
Valid  loss: 0.7482, acc: 0.81
EarlyStopping: 4 / 5
Train  loss: 0.1685, acc: 0.94
Valid  loss: 0.8782, acc: 0.79
EarlyStopping: 5 / 5


In [46]:
"""
79. アーキテクチャの変更
ニューラルネットワークのアーキテクチャを自由に変更し、モデルを学習せよ。また、学習したモデルの開発セットにおける正解率を求めよ。
例えば、テキストの特徴ベクトル（単語埋め込みの平均ベクトル）に対して多層のニューラルネットワークを通したり、
畳み込みニューラルネットワーク（CNN; Convolutional Neural Network）や再帰型ニューラルネットワーク（RNN; Recurrent Neural Network）などの
モデルの学習に挑戦するとよい。
"""
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn

class LogisticRegression(nn.Module):
    def __init__(self, embedding_matrix, vec_dim, hidden_dim, output_dim, freeze=True):
        super().__init__()

        embedding_weights = torch.tensor(embedding_matrix, dtype=torch.float32)
        self.embedding = nn.Embedding.from_pretrained(embedding_weights, freeze=freeze)

        self.hidden = nn.Linear(vec_dim, hidden_dim)
        self.relu = nn.ReLU()

        self.linear = nn.Linear(hidden_dim, output_dim)

    def forward(self, input_ids_list):
        batch_vectors = []
        
        for input_ids in input_ids_list:
            emb = self.embedding(input_ids)
            avg_vec = torch.mean(emb, dim=0)
            hidden_output = self.relu(self.hidden(avg_vec))
            output = torch.sigmoid(self.linear(hidden_output))
            
            batch_vectors.append(output)

        batch_tensor = torch.stack(batch_vectors)
        return batch_tensor

def train(model, train_loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    for batch in train_loader:
        train_X = batch["input_ids"].to(device)
        train_Y = batch["label"].to(device)

        optimizer.zero_grad()
        preds = model(train_X)
        loss = loss_fn(preds, train_Y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * train_X.size(0)
        pred_labels = (preds >= 0.5).float()
        total_correct += (pred_labels == train_Y).sum().item()
        total_samples += train_X.size(0)

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples

    return avg_loss, avg_acc

def evaluate(model, valid_loader, loss_fn, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
         for batch in valid_loader:
            dev_X = batch["input_ids"].to(device)
            dev_Y = batch["label"].to(device)

            preds = model(dev_X)
            loss = loss_fn(preds, dev_Y)

            total_loss += loss.item() * dev_X.size(0)
            pred_labels = (preds >= 0.5).float()
            total_correct += (pred_labels == dev_Y).sum().item()
            total_samples += dev_X.size(0)

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples

    return avg_loss, avg_acc

train_loader = DataLoader(train_ids_list, batch_size=32, shuffle=True, collate_fn=collate)
valid_loader = DataLoader(dev_ids_list, batch_size=32, shuffle=False, collate_fn=collate)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LogisticRegression(embedding_matrix, 300, 128, 1).to(device)
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

max_epochs = 10
best_valid_loss = float('inf')
early_stopping_cnt = 0
patience = 5
save_path = 'model/LogisticRegression79.pth'

for epoch in range(max_epochs):
    print(f"=================== {epoch+1} / {max_epochs} epoch ===================")
    
    train_loss, train_acc = train(model, train_loader, optimizer, loss_fn, device)
    print(f"Train  loss: {train_loss:.4f}, acc: {train_acc:.2f}")
    
    valid_loss, valid_acc = evaluate(model, valid_loader, loss_fn, device)
    print(f"Valid  loss: {valid_loss:.4f}, acc: {valid_acc:.2f}")

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        early_stopping_cnt = 0

        torch.save(model.state_dict(), save_path)
    else:
        early_stopping_cnt += 1
        print(f"EarlyStopping: {early_stopping_cnt} / {patience}")

        if early_stopping_cnt >= patience:
            break



Train  loss: 0.4054, acc: 0.82
Valid  loss: 0.4396, acc: 0.81
Train  loss: 0.3405, acc: 0.85
Valid  loss: 0.4313, acc: 0.80
Train  loss: 0.3270, acc: 0.86
Valid  loss: 0.4315, acc: 0.81
EarlyStopping: 1 / 5
Train  loss: 0.3175, acc: 0.86
Valid  loss: 0.4281, acc: 0.81
Train  loss: 0.3078, acc: 0.87
Valid  loss: 0.4322, acc: 0.79
EarlyStopping: 1 / 5
Train  loss: 0.2987, acc: 0.87
Valid  loss: 0.4268, acc: 0.81
Train  loss: 0.2892, acc: 0.88
Valid  loss: 0.4325, acc: 0.81
EarlyStopping: 1 / 5
Train  loss: 0.2803, acc: 0.88
Valid  loss: 0.4433, acc: 0.81
EarlyStopping: 2 / 5
Train  loss: 0.2704, acc: 0.89
Valid  loss: 0.4375, acc: 0.81
EarlyStopping: 3 / 5
Train  loss: 0.2614, acc: 0.89
Valid  loss: 0.4335, acc: 0.81
EarlyStopping: 4 / 5
