In [7]:
%matplotlib inline
import os
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt
from collections import OrderedDict

#### 全結合層

In [2]:
class Linear:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(self.x, self.W) + self.b
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        #self.dW = np.dot(self.x[None,:].T, dout[None,:])
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        return dx

#### sigmoid関数

In [3]:
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        out = 1/ (1 + np.exp(-x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * (1 - self.out) * self.out
        return dx

#### Relu関数

In [4]:
class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        
        return dx

#### network全体のクラス

In [6]:
class MLP_init:
    def __init__(self, input_size, hidden_size, output_size):
        self.params = {}
        self.params['W1'] = np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.random.randn(hidden_size)
        self.params['W2'] = np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.random.randn(output_size)
        
        self.layers = OrderedDict()
        self.layers['Linear1'] = Linear(self.params['W1'], self.params['b1'])
        self.layers['Sigmoid1'] = Sigmoid()
        self.layers['Linear2'] = Linear(self.params['W2'], self.params['b2'])
        self.layers['Sigmoid2'] = Sigmoid()
    
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self, x, t):
        if type(t) != np.ndarray or type(t) != list:
            t = np.eye(3)[t]
        y = self.predict(x)
        return 1/2 * np.sum((y - t) ** 2)
    
    
    def accuracy(self, x, t):
        y = np.argmax(self.predict(x), axis = 1)
        acc = np.sum(y == t)/float(x.shape[0])
        return acc
        
    
    def gradient(self, x, t):
        #backward
        if type(t) != np.ndarray or type(t) != list:
            t = np.eye(3)[t]
        dout = self.predict(x) - t
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
            
        grads = {}
        grads['W1'] = self.layers['Linear1'].dW
        grads['b1'] = self.layers['Linear1'].db
        grads['W2'] = self.layers['Linear2'].dW
        grads['b2'] = self.layers['Linear2'].db
        
        return grads

#### データの読み込み

In [None]:
network = MLP_init(3, 4, 7)
X_train, X_test, y_train, y_test = 