In [46]:
from gensim.models import KeyedVectors
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import tensorflow

model = KeyedVectors.load_word2vec_format('../section_7/GoogleNews-vectors-negative300.bin.gz', binary=True)

KeyboardInterrupt: 

70. 単語ベクトルの和による特徴量

XとYについて、訓練データ、検証データ、テストデータを作成

In [None]:
# csvとして読み取り、namesにヘッダを追加
df = pd.read_csv('../section_6/news+aggregator/newsCorpora.tsv', sep='\t', header=0, names=['TITLE','URL','PUBLISHER','CATEGORY','STORY','HOSTNAME','TIMESTAMP'])

valid_publishers = ["Reuters", "Huffington Post", "Businessweek", "Contactmusic.com", "Daily Mail"]
df_publisher = df[df['PUBLISHER'].isin(valid_publishers)]
df_shuffle = df_publisher.sample(frac=1)

# 分割手続き
length = len(df_shuffle)
eighty = (length//100)*80
ninety = (length//100)*90
(train,valid,test) = (df_shuffle[:eighty],df_shuffle[eighty:ninety],df_shuffle[ninety:])

# Xについて作成

for key, value in {'X_train.txt':train,'X_test.txt':test,'X_valid.txt':valid}.items():
    with open('data/'+key,'w') as f:
        for text in value['TITLE']:
            vec = []
            for word in text.split():
                if word in model:
                    vec.append(model[word])
            if not len(vec):
                vec = np.zeros(300)
            else:
                vec = np.array(vec)
                vec = vec.mean(axis=0)
            vec = vec.astype(np.str_).tolist()
            output = ' '.join(vec)+'\n'
            f.write(output)

# Yについて作成

sep_dict = {"b":0,"t":1,"e":2,"m":3}

Y_train = [sep_dict[c] for c in train['CATEGORY'].to_numpy()]
Y_valid = [sep_dict[c] for c in valid['CATEGORY'].to_numpy()]
Y_test = [sep_dict[c] for c in test['CATEGORY'].to_numpy()]

np.savetxt('data/Y_train.txt', Y_train, fmt='%d')
np.savetxt('data/Y_valid.txt', Y_valid, fmt='%d')
np.savetxt('data/Y_test.txt', Y_test, fmt='%d')

71. 単層ニューラルネットワークによる予測

パーセプトロンの順伝播での計算

In [None]:
X = np.loadtxt("data/X_train.txt", delimiter=' ') 
X = torch.tensor(X,dtype=torch.float64)

g = torch.Generator()
g.manual_seed(523)
W = torch.randn(300,4, dtype=torch.float64)

scores_1 = X[:1] @ W
scores_4 = X[:4] @ W

softmax = torch.nn.Softmax(dim=1)
print(softmax(scores_1))
print(softmax(scores_4))

tensor([[0.4634, 0.4984, 0.0276, 0.0106]], dtype=torch.float64)
tensor([[0.4634, 0.4984, 0.0276, 0.0106],
        [0.0046, 0.9760, 0.0161, 0.0032],
        [0.0311, 0.8073, 0.0771, 0.0845],
        [0.5015, 0.4444, 0.0437, 0.0105]], dtype=torch.float64)


72. 損失と勾配の計算

損失と勾配を計算せよ

In [None]:
Y = np.loadtxt("data/Y_train.txt", delimiter=' ')
Y = torch.tensor(Y,dtype=torch.float64)

loss = torch.nn.CrossEntropyLoss()

loss_1 = loss(scores_1,Y[:1].type(torch.long))
loss_4 = loss(scores_4,Y[:4].type(torch.long))

print(loss_1,loss_4)

ans = []
for s,i in zip(softmax(scores_4),Y[:4].type(torch.long)):
  ans.append(-np.log(s[i]))
print (np.mean(ans))

tensor(0.7692, dtype=torch.float64) tensor(2.0370, dtype=torch.float64)
2.037038505908271


73. 確率的勾配降下法による学習

確率的勾配降下法（SGD: Stochastic Gradient Descent）を用いて，行列Wを学習せよ．なお，学習は適当な基準で終了させればよい（例えば「100エポックで終了」など）

In [None]:
X = np.loadtxt("data/X_train.txt", delimiter=' ') 
X = torch.tensor(X,dtype=torch.float64)

Y = np.loadtxt("data/Y_train.txt", delimiter=' ')
Y = torch.tensor(Y,dtype=torch.long)

W = torch.randn(300,4, dtype=torch.float64, requires_grad=True)

loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD([W], lr=0.1, momentum=0.9)

for epoch in range(100):
    losses = loss(X @ W,Y)
    optimizer.zero_grad()
    losses.backward()
    optimizer.step()

74. 正解率の計測

問題73で求めた行列を用いて学習データおよび評価データの事例を分類したとき，その正解率をそれぞれ求めよ．

In [None]:
def acc(pred, label):
    pred = torch.argmax(pred, axis=1).numpy()
    label = label.data.numpy()
    return (pred == label).mean()

X_valid = np.loadtxt("data/X_valid.txt", delimiter=' ') 
X_valid = torch.tensor(X_valid,dtype=torch.float64)

Y_valid = np.loadtxt("data/Y_valid.txt", delimiter=' ')
Y_valid = torch.tensor(Y_valid,dtype=torch.long)

print(acc(X @ W,Y))
print(acc(X_valid @ W,Y_valid))

0.750093984962406
0.7518796992481203


75. 損失と正解率のプロット + 74.にデータの正規化を入れてまとめたもの

問題73のコードを改変し，各エポックのパラメータ更新が完了するたびに，訓練データでの損失，正解率，検証データでの損失，正解率をグラフにプロットし，学習の進捗状況を確認できるようにせよ．

In [None]:
from torch.utils.tensorboard import SummaryWriter

# データの読み込みと正規化
def load_and_normalize(file_path):
    data = np.loadtxt(file_path, delimiter=' ')
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    return (data - mean) / std

# 名前を少しわかりやすく
X_train = load_and_normalize("data/X_train.txt")
X_train = torch.tensor(X_train, dtype=torch.float64)

Y_train = np.loadtxt("data/Y_train.txt", delimiter=' ')
Y_train = torch.tensor(Y_train, dtype=torch.long)

X_valid = load_and_normalize("data/X_valid.txt")
X_valid = torch.tensor(X_valid, dtype=torch.float64)

Y_valid = np.loadtxt("data/Y_valid.txt", delimiter=' ')
Y_valid = torch.tensor(Y_valid, dtype=torch.long)

W = torch.randn(300, 4, dtype=torch.float64, requires_grad=True)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD([W], lr=0.01, momentum=0.9)
writer = SummaryWriter(log_dir="log")

# 学習
for epoch in range(2500):
    scores = X_train @ W
    loss = loss_fn(scores, Y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    with torch.no_grad():
        Y_pred = X_train @ W
        loss = loss_fn(Y_pred, Y_train) 
        writer.add_scalar('Loss/train', loss, epoch)
        writer.add_scalar('Accuracy/train', acc(Y_pred,Y_train), epoch)

        Y_pred = X_valid @ W
        loss = loss_fn(Y_pred, Y_valid)
        writer.add_scalar('Loss/valid', loss, epoch)
        writer.add_scalar('Accuracy/valid', acc(Y_pred,Y_valid), epoch)


def acc(pred, label):
    pred = torch.argmax(pred, axis=1).numpy()
    label = label.numpy()
    return (pred == label).mean()

train_acc = acc(X_train @ W, Y_train)
valid_acc = acc(X_valid @ W, Y_valid)

print(f"Training Accuracy: {train_acc}")
print(f"Validation Accuracy: {valid_acc}")


Training Accuracy: 0.8809210526315789
Validation Accuracy: 0.8413533834586466


76. チェックポイント

問題75のコードを改変し，各エポックのパラメータ更新が完了するたびに，チェックポイント（学習途中のパラメータ（重み行列など）の値や最適化アルゴリズムの内部状態）をファイルに書き出せ．

In [None]:
# データの読み込みと正規化
def load_and_normalize(file_path):
    data = np.loadtxt(file_path, delimiter=' ')
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    return (data - mean) / std

# 名前を少しわかりやすく
X_train = load_and_normalize("data/X_train.txt")
X_train = torch.tensor(X_train, dtype=torch.float64)

Y_train = np.loadtxt("data/Y_train.txt", delimiter=' ')
Y_train = torch.tensor(Y_train, dtype=torch.long)

X_valid = load_and_normalize("data/X_valid.txt")
X_valid = torch.tensor(X_valid, dtype=torch.float64)

Y_valid = np.loadtxt("data/Y_valid.txt", delimiter=' ')
Y_valid = torch.tensor(Y_valid, dtype=torch.long)

W = torch.randn(300, 4, dtype=torch.float64, requires_grad=True)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD([W], lr=0.1, momentum=0.9)
writer = SummaryWriter(log_dir="log")

for epoch in range(10):
    scores = X_train @ W
    loss = loss_fn(scores, Y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    with torch.no_grad():
        Y_pred = X_train @ W
        loss = loss_fn(Y_pred, Y_train) 
        writer.add_scalar('Loss/train', loss, epoch)
        writer.add_scalar('Accuracy/train', acc(Y_pred,Y_train), epoch)

        Y_pred = X_valid @ W
        loss = loss_fn(Y_pred, Y_valid)
        writer.add_scalar('Loss/valid', loss, epoch)
        writer.add_scalar('Accuracy/valid', acc(Y_pred,Y_valid), epoch)

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': W,
        'optimizer_state_dict': optimizer.state_dict()
    }
    torch.save(checkpoint, f"cp/epoch_{epoch}.pt")

def acc(pred, label):
    pred = torch.argmax(pred, axis=1).numpy()
    label = label.numpy()
    return (pred == label).mean()

train_acc = acc(X_train @ W, Y_train)
valid_acc = acc(X_valid @ W, Y_valid)

print(f"Training Accuracy: {train_acc}")
print(f"Validation Accuracy: {valid_acc}")


Training Accuracy: 0.6862781954887218
Validation Accuracy: 0.6879699248120301


77. ミニバッチ化

問題76のコードを改変し，B事例ごとに損失・勾配を計算し，行列Wの値を更新せよ（ミニバッチ化）．Bの値を1,2,4,8,…と変化させながら，1エポックの学習に要する時間を比較せよ．

In [None]:
import time

# データの読み込みと正規化
def load_and_normalize(file_path):
    data = np.loadtxt(file_path, delimiter=' ')
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    return (data - mean) / std

# 名前を少しわかりやすく
X_train = load_and_normalize("data/X_train.txt")
X_train = torch.tensor(X_train, dtype=torch.float64)

Y_train = np.loadtxt("data/Y_train.txt", delimiter=' ')
Y_train = torch.tensor(Y_train, dtype=torch.long)

X_valid = load_and_normalize("data/X_valid.txt")
X_valid = torch.tensor(X_valid, dtype=torch.float64)

Y_valid = np.loadtxt("data/Y_valid.txt", delimiter=' ')
Y_valid = torch.tensor(Y_valid, dtype=torch.long)

loss_fn = torch.nn.CrossEntropyLoss()
writer = SummaryWriter(log_dir="log")

def acc(pred, label):
    pred = torch.argmax(pred, axis=1).numpy()
    label = label.numpy()
    return (pred == label).mean()

batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256]
epoch_time_results = {}

for B in batch_sizes:
    W = torch.randn(300, 4, dtype=torch.float64, requires_grad=True)
    optimizer = torch.optim.SGD([W], lr=0.1, momentum=0.9)

    start_time = time.time()
    
    for epoch in range(10):
        permutation = torch.randperm(X_train.size()[0])
        
        for i in range(0, X_train.size()[0], B):
            indices = permutation[i:i+B]
            batch_X, batch_Y = X_train[indices], Y_train[indices]
            
            scores = batch_X @ W
            loss = loss_fn(scores, batch_Y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            Y_pred = X_train @ W
            train_loss = loss_fn(Y_pred, Y_train) 
            writer.add_scalar(f'Loss/train_batch_{B}', train_loss, epoch)
            writer.add_scalar(f'Accuracy/train_batch_{B}', acc(Y_pred,Y_train), epoch)

            Y_pred = X_valid @ W
            valid_loss = loss_fn(Y_pred, Y_valid)
            writer.add_scalar(f'Loss/valid_batch_{B}', valid_loss, epoch)
            writer.add_scalar(f'Accuracy/valid_batch_{B}', acc(Y_pred,Y_valid), epoch)

    end_time = time.time()
    epoch_time_results[B] = end_time - start_time
    print(f"Batch size {B}: {epoch_time_results[B]} seconds")

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': W,
        'optimizer_state_dict': optimizer.state_dict()
    }
    torch.save(checkpoint, f"cp/batch_{B}_epoch_{epoch}.pt")

print("Training Accuracy:", acc(X_train @ W, Y_train))
print("Validation Accuracy:", acc(X_valid @ W, Y_valid))
print("Epoch time results:", epoch_time_results)

Batch size 1: 9.457581043243408 seconds
Batch size 2: 9.055637121200562 seconds
Batch size 4: 4.530675888061523 seconds
Batch size 8: 2.3388307094573975 seconds
Batch size 16: 1.2995638847351074 seconds
Batch size 32: 0.6992628574371338 seconds
Batch size 64: 0.3909733295440674 seconds
Batch size 128: 0.23469328880310059 seconds
Batch size 256: 0.151123046875 seconds
Training Accuracy: 0.8953007518796993
Validation Accuracy: 0.849624060150376
Epoch time results: {1: 9.457581043243408, 2: 9.055637121200562, 4: 4.530675888061523, 8: 2.3388307094573975, 16: 1.2995638847351074, 32: 0.6992628574371338, 64: 0.3909733295440674, 128: 0.23469328880310059, 256: 0.151123046875}


78. GPU上での学習

問題77のコードを改変し，GPU上で学習を実行せよ．（パーセプトロンの仕組みがわかったので`torch.nn.Module`をオーバーライドしてみる）

In [None]:
import time

# データの読み込みと正規化
def load_and_normalize(file_path):
    data = np.loadtxt(file_path, delimiter=' ')
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    return (data - mean) / std

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# 名前を少しわかりやすく
X_train = load_and_normalize("data/X_train.txt")
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)

Y_train = np.loadtxt("data/Y_train.txt", delimiter=' ')
Y_train = torch.tensor(Y_train, dtype=torch.long).to(device)

X_valid = load_and_normalize("data/X_valid.txt")
X_valid = torch.tensor(X_valid, dtype=torch.float32).to(device)

Y_valid = np.loadtxt("data/Y_valid.txt", delimiter=' ')
Y_valid = torch.tensor(Y_valid, dtype=torch.long).to(device)

loss_fn = torch.nn.CrossEntropyLoss().to(device)
writer = SummaryWriter(log_dir="log")

def acc(pred, label):
    pred = torch.argmax(pred, axis=1).cpu().numpy()
    label = label.cpu().numpy()
    return (pred == label).mean()

batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256]
epoch_time_results = {}

for B in batch_sizes:
    W = torch.randn(300, 4, dtype=torch.float32, requires_grad=True, device=device)
    optimizer = torch.optim.SGD([W], lr=0.1, momentum=0.9)

    start_time = time.time()
    
    for epoch in range(10):
        permutation = torch.randperm(X_train.size()[0], device=device)
        
        for i in range(0, X_train.size()[0], B):
            indices = permutation[i:i+B]
            batch_X, batch_Y = X_train[indices], Y_train[indices]
            
            scores = batch_X @ W
            loss = loss_fn(scores, batch_Y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            Y_pred = X_train @ W
            train_loss = loss_fn(Y_pred, Y_train) 
            writer.add_scalar(f'Loss/train_batch_{B}', train_loss, epoch)
            writer.add_scalar(f'Accuracy/train_batch_{B}', acc(Y_pred,Y_train), epoch)

            Y_pred = X_valid @ W
            valid_loss = loss_fn(Y_pred, Y_valid)
            writer.add_scalar(f'Loss/valid_batch_{B}', valid_loss, epoch)
            writer.add_scalar(f'Accuracy/valid_batch_{B}', acc(Y_pred,Y_valid), epoch)

    end_time = time.time()
    epoch_time_results[B] = end_time - start_time
    print(f"Batch size {B}: {epoch_time_results[B]} seconds")

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': W.cpu(),
        'optimizer_state_dict': optimizer.state_dict()
    }
    torch.save(checkpoint, f"cp/batch_{B}_epoch_{epoch}.pt")

print("Training Accuracy:", acc(X_train @ W, Y_train))
print("Validation Accuracy:", acc(X_valid @ W, Y_valid))
print("Epoch time results:", epoch_time_results)

Batch size 1: 83.89398980140686 seconds
Batch size 2: 41.44503307342529 seconds
Batch size 4: 21.423542022705078 seconds
Batch size 8: 10.67266297340393 seconds
Batch size 16: 5.2827489376068115 seconds
Batch size 32: 2.578295946121216 seconds
Batch size 64: 1.4275047779083252 seconds
Batch size 128: 0.7153580188751221 seconds
Batch size 256: 0.37299609184265137 seconds
Training Accuracy: 0.8883458646616541
Validation Accuracy: 0.8473684210526315
Epoch time results: {1: 83.89398980140686, 2: 41.44503307342529, 4: 21.423542022705078, 8: 10.67266297340393, 16: 5.2827489376068115, 32: 2.578295946121216, 64: 1.4275047779083252, 128: 0.7153580188751221, 256: 0.37299609184265137}


79. 多層ニューラルネットワーク

問題78のコードを改変し，バイアス項の導入や多層化など，ニューラルネットワークの形状を変更しながら，高性能なカテゴリ分類器を構築せよ．

In [63]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# 3LP
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.w1 = nn.Linear(300, 600)
        self.w2 = nn.Linear(600, 50)
        self.w3 = nn.Linear(50, 4)
        
    def forward(self, x):
        x = F.relu(self.w1(x))
        x = F.relu(self.w2(x))
        return self.w3(x)

# 正規化
def load_and_normalize(file_path):
    data = np.loadtxt(file_path, delimiter=' ')
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    return (data - mean) / std

# 正解率
def acc(pred, label):
    pred = torch.argmax(pred, axis=1).cpu().numpy()
    label = label.cpu().numpy()
    return (pred == label).mean()

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# 読み込み と to Tensor
X_train = load_and_normalize("data/X_train.txt")
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)

Y_train = np.loadtxt("data/Y_train.txt", delimiter=' ')
Y_train = torch.tensor(Y_train, dtype=torch.long).to(device)

X_valid = load_and_normalize("data/X_valid.txt")
X_valid = torch.tensor(X_valid, dtype=torch.float32).to(device)

Y_valid = np.loadtxt("data/Y_valid.txt", delimiter=' ')
Y_valid = torch.tensor(Y_valid, dtype=torch.long).to(device)

batch_size = 2**10

# 訓練データだけ先に作っておく
train = DataLoader(TensorDataset(X_train,Y_train),batch_size=batch_size,shuffle=True)

# インスタンス
net = MLP().to(device)

# loss_function, optimizerの設定
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)

# 一応学習曲線出すために
writer = SummaryWriter(log_dir="log")

for epoch in range(3000):
    for x, y in train:
        optimizer.zero_grad()
        scores = net(x)
        loss = loss_fn(scores, y)
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        scores = net(X_train)
        train_loss = loss_fn(scores, Y_train) 
        writer.add_scalar(f'Loss/train', train_loss, epoch)
        writer.add_scalar(f'Accuracy/train', acc(scores,Y_train), epoch)

        scores = net(X_valid)
        valid_loss = loss_fn(scores, Y_valid)
        writer.add_scalar(f'Loss/valid', valid_loss, epoch)
        writer.add_scalar(f'Accuracy/valid', acc(scores,Y_valid), epoch)

checkpoint = {
    'epoch': epoch,
    'model_state_dict': net.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
}
torch.save(checkpoint, f"cp/batch_{batch_size}_epoch_{epoch}.pt")

print("Training Accuracy:", acc(net(X_train),Y_train))
print("Validation Accuracy:", acc(net(X_valid),Y_valid))

Training Accuracy: 0.9396616541353383
Validation Accuracy: 0.8902255639097745
