In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
class FNN():
    """
    入力、隠れ、出力層の順伝播型ニューラルネットワーク

    """

    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate, batch_size):
        # 入力層、隠れ層、出力層のノード数を設定
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        self.batch_size = batch_size

        # 平均0、標準偏差1の正規分布に従う
        # self.w2 = np.random.randn(self.hnodes, self.inodes)
        # self.b2 = np.zeros((self.hnodes, 1))
        # self.w3 = np.random.randn(self.onodes, self.hnodes)
        # self.b3 = np.zeros((self.onodes, 1))

        # Xavierの初期値
        self.w2 = np.random.normal(0.0, pow(1 / self.inodes, 0.5), (self.hnodes, self.inodes))
        self.b2 = np.zeros((self.hnodes,1))
        self.w3 = np.random.normal(0.0, pow(1 / self.hnodes, 0.5), (self.onodes, self.hnodes))
        self.b3 = np.zeros((self.onodes,1))

        # 学習率の設定
        self.lr = learningrate

    def train(self, inputs_list, target_list):        
        inputs = np.array(inputs_list).reshape( self.inodes, -1)
        targets = np.array(target_list).reshape(-1,self.onodes)

        z2, u2, z3, loss = self.forward(inputs_list, target_list, train=True)
        print("loss", loss)

        o = z3

        # deltaの計算
        # 出力層の誤差 = (最終出力 - 目標出力)
        delta_3 = o - targets.T
        delta_2 = self.d_sigmoid(u2) * (np.dot(self.w3.T, delta_3))

        #　重みを更新
        self.b3 += self.lr * (-np.dot(delta_3, np.ones((self.batch_size,1))))
        self.w3 += self.lr * (-np.dot(delta_3, z2.T))
        self.b2 += self.lr * (-np.dot(delta_2, np.ones((self.batch_size,1))))
        z1 = inputs
        self.w2 += self.lr * (-np.dot(delta_2, z1.T))

        return z3, loss


    def forward(self, inputs_list, target_list=None, train=False):
        """
        順方向計算
        """
        # 入力リストを行列に変換
        inputs = np.array(inputs_list).reshape( self.inodes, -1)

        # 隠れ層
        u2 = np.dot(self.w2, inputs) + np.dot(self.b2, np.ones((inputs.shape[1],1)).T)

        # 隠れ層で結合された信号を活性化関数により出力
        z2 = self.sigmoid(u2)

        # 出力層
        u3 = np.dot(self.w3, z2) + np.dot(self.b3, np.ones((inputs.shape[1],1)).T)

        # 出力層で結合された信号を活性化関数により出力
        z3 = self.softmax(u3)

        if target_list is not None:
            targets = np.array(target_list).reshape(-1,self.onodes)
            loss = self.cross_entropy_error(z3,targets)
            if train:
                return z2, u2, z3, loss 
            return z3, loss

        return z3

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def d_sigmoid(self, x):
        """
        シグモイド関数の導関数
        """
        return self.sigmoid(x)*(1 - self.sigmoid(x))

    def softmax(self, x):
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y


    def cross_entropy_error(self, y, t):
        """
        交差エントロピー誤差を計算する

        Parameters
        ----------
        y : numpy.ndarray
            ニューラルネットの出力

        t : numpy.ndarray
            教師データ(one-hot表現)

        Returns
        -------
        error : numpy.ndarray
            計算した誤差
        """

        batch_size = y.shape[1]

        # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換
        if t.size == y.size:
            t = t.argmax(axis=1)

        delta = 1e-7
        y = y.T
        error = -np.sum(np.log(y[np.arange(batch_size), t] + delta)) / batch_size

        return error

In [None]:
input_nodes = 784  #  mnistの場合28*28=794
hidden_nodes = 100  # 隠れ層のサイズ
output_nodes = 10  # mnistの場合0 ~ 9

learning_rate = 0.01  # 学習率

batch_size = 100
epoch = 10  # 学習回数

In [None]:
network = FNN(input_nodes, hidden_nodes, output_nodes, learning_rate, batch_size)

mnistデータセットのダウンロード

In [None]:
# !wget https://pjreddie.com/media/files/mnist_test.csv

In [None]:
# !wget https://pjreddie.com/media/files/mnist_train.csv

データの読み込み

In [None]:
with open("mnist_train.csv", 'r') as f:
    training_data = f.readlines()

In [None]:
with open("mnist_test.csv", 'r') as f:
    test_data = f.readlines()

In [None]:
print(len(training_data))  # データ数の確認‘
print(len(test_data))

In [None]:
training_data = np.array(training_data)

学習

In [None]:
train_loss = []  # 学習誤差
test_loss = []  # テスト誤差
train_accuracy = []
test_accuracy = []

for i in range(epoch):
    loss = 0
    score = 0
    idx = np.random.permutation(len(training_data))
    print("epoch:", i)
    for j in range(0,60000, batch_size):  # ランダムに並べ直したすべてのデータで学習する
        data = training_data[idx[j:j+batch_size]]
        x_batch = []
        t = []
        for k in data:
            x_batch.append(list(map(lambda x: int(x) , k.split(",")[1:])))
            t.append(int(k.split(",")[0]))
        x_batch = np.array(x_batch).T / 255.0
        y_batch = [ int(k[0]) for k in data]
        
        target = np.zeros(( batch_size, output_nodes))
        
        # one-hot表現に変更
        for k in range(batch_size):
            target[k, y_batch[k]] = 1
            
        # 学習
        output, loss_ = network.train(x_batch, target)
        # 訓練誤差と正解率を格納
        loss += loss_
        labels = np.argmax(output.T, axis=1)
        score += np.sum(labels == t)
        
    train_loss.append(loss/(60000/batch_size))
    train_accuracy.append(score / 60000)
    
    # テスト誤差を求める
    loss = 0
    score = 0
    test_size = 10000
    for j in range(0,test_size,batch_size):
        data = test_data[j:j+batch_size]
        
        x_batch = []
        t = []
        for k in data:
            x_batch.append(list(map(lambda x: int(x) , k.split(",")[1:])))
            t.append(int(k.split(",")[0]))
        x_batch = np.array(x_batch).T / 255.0
        y_batch = [ int(k[0]) for k in data]
        
        target = np.zeros(( batch_size, output_nodes))
        
        # one-hot表現に変更
        for k in range(batch_size):
            target[k, y_batch[k]] = 1
        output, loss_ = network.forward(x_batch, target)
        loss += loss_
        labels = np.argmax(output.T, axis=1)
        score += np.sum(labels == t)
        
    test_loss.append(loss/(test_size/batch_size))
    test_accuracy.append(score / test_size)
    

In [None]:
plt.plot(train_loss[:], label="train_loss")
plt.plot(test_loss[:], label="test_loss")
plt.legend()
plt.show()

In [None]:
plt.plot(train_accuracy, label="train_accuracy")
plt.plot(test_accuracy, label="test_accuracy")
plt.legend()
plt.show()

In [None]:
with open("mnist_test.csv", 'r') as f:
    test_data = f.readlines()

score = 0
test_size = 10000
for i in range(0,test_size,batch_size):
    data = test_data[i:i+batch_size]
    
    x_batch = []
    t = []
    for j in data:
        x_batch.append(list(map(lambda x: int(x) , j.split(",")[1:])))
        t.append(int(j.split(",")[0]))
    x_batch = np.array(x_batch).T / 255.0
    
    output = network.forward(x_batch)
    labels = np.argmax(output.T, axis=1)
    score += np.sum(labels == t)


print("正解率 =", score / test_size)

In [None]:
with open("mnist_train.csv", 'r') as f:
    training_data = f.readlines()
    
score = 0
test_size = 60000
for i in range(0,test_size,batch_size):
    data = training_data[i:i+batch_size]
    
    x_batch = []
    t = []
    for j in data:
        x_batch.append(list(map(lambda x: int(x) , j.split(",")[1:])))
        t.append(int(j.split(",")[0]))
    x_batch = np.array(x_batch).T / 255.0
    
    output = network.forward(x_batch)
    labels = np.argmax(output.T, axis=1)
    score += np.sum(labels == t)

print("正解率 =", score / test_size)

### irisデータセット

In [None]:
from sklearn import datasets

In [None]:
iris = datasets.load_iris()

In [None]:
target = iris["target"]
data = iris["data"]

In [None]:
data[:,0] /= np.max(data[:,0])
data[:,1] /= np.max(data[:,1])
data[:,2] /= np.max(data[:,2])
data[:,3] /= np.max(data[:,3])

In [None]:
idx = np.random.permutation(len(data))
print(idx)
train_x, train_y, test_x, test_y = data[idx[:100]], target[idx[:100]], data[idx[100:]], target[idx[100:]]

In [None]:
input_nodes = 4  #  irisの場合4
hidden_nodes = 4  # 隠れ層のサイズ
output_nodes = 3  # irisdatasetの場合0 ~ 2

learning_rate = 0.01  # 学習率

batch_size = 10
epoch = 500 # 学習回数

In [None]:
network = FNN(input_nodes, hidden_nodes, output_nodes, learning_rate, batch_size)

In [None]:
def shuffle(x,y):
    x_y = list(zip(x,y))
    np.random.shuffle(x_y)
    result_x, result_y = zip(*x_y)
    return np.asarray(result_x), np.asarray(result_y)

In [None]:
train_loss = []
test_loss = []
train_accuracy = []
test_accuracy =[]

for i in range(epoch):
    loss = 0
    score = 0
    print("epoch:", i)
    train_x, train_y = shuffle(train_x, train_y)
    for j in range(0,100, batch_size):  # 並び変えたすべてのデータで学習する
        x_batch = train_x[j:j+batch_size]
        y_batch = np.zeros(( batch_size, output_nodes))
        t = train_y[j:j+batch_size]
        
        # one-hot表現に変更
        for k in range(batch_size):
            y_batch[k, train_y[j+k]] = 1
        
        # 学習
        output, loss_ = network.train(x_batch.T, y_batch)
        # 訓練誤差と正解率を格納
        loss += loss_
        labels = np.argmax(output.T, axis=1)
        score += np.sum(labels == t)
        
    train_loss.append(loss/(100/batch_size))
    train_accuracy.append(score / 100)
    
    # テスト誤差を求める
    loss = 0
    score = 0
    test_size = 50
    for j in range(0,test_size,batch_size):
        x_batch = test_x[j:j+batch_size]
        y_batch = np.zeros(( batch_size, output_nodes))
        
        # one-hot表現に変更
        for k in range(batch_size):
            y_batch[k, test_y[j+k]] = 1
         
        t = test_y[j:j+batch_size]
        output, loss_ = network.forward(x_batch.T, y_batch)
        loss += loss_
        labels = np.argmax(output.T, axis=1)
        score += np.sum(labels == t)
    
    test_loss.append(loss/(test_size/batch_size))
    test_accuracy.append(score/test_size)
    

In [None]:
plt.plot(train_accuracy, label="train_accuracy")
plt.plot(test_accuracy, label="test_accuracy")
plt.legend()
plt.show()

In [None]:
plt.plot(train_loss[:], label="train_loss")
plt.plot(test_loss[:], label="test_loss")
plt.legend()
plt.show()

In [None]:
score = 0
test_size = 100
for i in range(0,test_size,batch_size):
    x_batch = train_x[i:i+batch_size]
    t = train_y[i:i+batch_size]
    output = network.forward(x_batch.T)
    labels = np.argmax(output.T, axis=1)
    score += np.sum(labels == t)

print("正解率 =", score / test_size)

In [None]:
batch_size = 2
score = 0
test_size = 50
for i in range(0,test_size,batch_size):
    x_batch = test_x[i:i+batch_size]
    t = test_y[i:i+batch_size]
    output = network.forward(x_batch.T)
    labels = np.argmax(output.T, axis=1)
    score += np.sum(labels == t)

print("正解率 =", score / test_size)