In [1]:
!pip install dezero

Collecting dezero
  Downloading dezero-0.0.13-py3-none-any.whl (28 kB)
Installing collected packages: dezero
Successfully installed dezero-0.0.13


In [3]:
import numpy as np
from dezero import Variable

x_np = np.array(5.0)
x = Variable(x_np)

y = 3 * x ** 2
print(y)

variable(75.0)


In [4]:
y.backward()
print(x.grad)

variable(30.0)


In [5]:
import dezero.functions as F

# ベクトルの内積
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
a, b = Variable(a), Variable(b) # 省略可能
c = F.matmul(a, b)
print(c)

# 行列の積
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
c = F.matmul(a, b)
print(c)

variable(32)
variable([[19 22]
          [43 50]])


In [6]:
def rosenbrock(x0, x1):
    y = 100 * (x1 - x0 ** 2) ** 2 + (x0 - 1) ** 2
    return y

x0 = Variable(np.array(0.0))
x1 = Variable(np.array(2.0))

y = rosenbrock(x0, x1)
y.backward()
print(x0.grad, x1.grad)

variable(-2.0) variable(400.0)


In [7]:
x0 = Variable(np.array(0.0))
x1 = Variable(np.array(2.0))

lr = 0.001 # 学習率
iters = 10000 # 繰り返す数

for i in range(iters):
    print(x0, x1)
    y = rosenbrock(x0, x1)

    x0.cleargrad()
    x1.cleargrad()
    y.backward()

    x0.data -= lr * x0.grad.data
    x1.data -= lr * x1.grad.data

print(x0, x1)

[1;30;43mストリーミング出力は最後の 5000 行に切り捨てられました。[0m
variable(0.9570084124694905) variable(0.9156885683513011)
variable(0.9570268181468906) variable(0.9157238749885158)
variable(0.9570452153895911) variable(0.9157591661212849)
variable(0.9570636042019549) variable(0.9157944417570497)
variable(0.9570819845883425) variable(0.915829701903247)
variable(0.9571003565531113) variable(0.9158649465673097)
variable(0.9571187201006165) variable(0.9159001757566663)
variable(0.9571370752352101) variable(0.9159353894787414)
variable(0.9571554219612416) variable(0.9159705877409556)
variable(0.957173760283058) variable(0.916005770550725)
variable(0.9571920902050032) variable(0.9160409379154618)
variable(0.9572104117314186) variable(0.916076089842574)
variable(0.957228724866643) variable(0.9161112263394656)
variable(0.9572470296150123) variable(0.9161463474135364)
variable(0.95726532598086) variable(0.916181453072182)
variable(0.9572836139685166) variable(0.916216543322794)
variable(0.9573018935823099) variab

### トイ・データセット

In [8]:
np.random.seed(0) # シードを固定
x = np.random.rand(100, 1)
y = 5 + 2 * x + np.random.rand(100, 1)

### 戦警戒機の実装

In [9]:
# トイ・データセット
np.random.seed(0)
x = np.random.rand(100, 1)
y = 5 + 2 * x + np.random.rand(100, 1)
x, y = Variable(x), Variable(y)

W = Variable(np.zeros((1, 1)))
b = Variable(np.zeros(1))

def predict(x):
    y = F.matmul(x, W) + b
    return y

In [10]:
def mean_squared_error(x0, x1):
    diff = x0 - x1
    return F.sum(diff ** 2) / len(diff)

lr = 0.1
iters = 100

for i in range(iters):
    y_pred = predict(x)
    loss = mean_squared_error(y, y_pred)
    W.cleargrad()
    b.cleargrad()
    loss.backward()

    W.data -= lr * W.grad.data
    b.data -= lr * b.grad.data

    if i % 10 == 0: # 10回ごとに出力
        print(loss.data)

print('===')
print('W =', W.data)
print('b =', b.data)

42.296340129442335
0.24915731977561134
0.10078974954301652
0.09461859803040694
0.0902667138137311
0.08694585483964615
0.08441084206493275
0.08247571022229121
0.08099850454041051
0.07987086218625004
===
W = [[2.11807369]]
b = [5.46608905]


### 非線形なデータセット

In [11]:
np.random.seed(0)
x = np.random.rand(100, 1)
y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)

### ニューラルネットワークの実装

In [13]:
W1, b1 = Variable(np.zeros((1, 1))), Variable(np.zeros(1))
W2, b2 = Variable(np.zeros((1, 1))), Variable(np.zeros(1))

def predict(x):
    y = F.linear(x, W1, W2)
    y = F.sigmoid(y)
    y = F.linear(y, W2, b2)
    return y

In [14]:
# データセット
np.random.rand(100, 1)
y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)

# ①重みの初期化
I, H, O = 1, 10, 1
W1 = Variable(0.01 * np.random.rand(I, H))
b1 = Variable(np.zeros(H))
W2 = Variable(0.01 * np.random.rand(H, O))
b2 = Variable(np.zeros(O))

# ②ニューラルネットワークの推論
def predict(x):
    y = F.linear(x, W1, b1)
    y = F.sigmoid(y)
    y = F.linear(y, W2, b2)
    return y

lr = 0.2
iters = 10000

# ③ニューラルネットワークの学習
for i in range(iters):
    y_pred = predict(x)
    loss = F.mean_squared_error(y, y_pred)

    W1.cleargrad()
    b1.cleargrad()
    W2.cleargrad()
    b2.cleargrad()

    loss.backward()

    W1.data -= lr * W1.grad.data
    b1.data -= lr * b1.grad.data
    W2.data -= lr * W2.grad.data
    b2.data -= lr * b2.grad.data
    if i % 1000 == 0: # 1000回ごとに出力
        print(loss.data)

0.7789479819180808
0.27423606278199714
0.2731439283787476
0.2701263229248578
0.2588037718463923
0.22264474838282575
0.14272988610629356
0.09119775544015597
0.08656234433724523
0.08570817576774797


### レイヤとモデル

In [15]:
import dezero.layers as L

linear = L.Linear(10) # 出力サイズだけを指定

batch_size, input_size = 100, 5
x = np.random.randn(batch_size, input_size)
y = linear(x)

print('y shape:', y.shape)
print('params shape:', linear.W.shape, linear.b.shape)

for param in linear.params():
    print(param.name, param.shape)

y shape: (100, 10)
params shape: (5, 10) (10,)
b (10,)
W (5, 10)


In [16]:
from dezero import Model

class TwoLayerNet(Model):
    def __init__(self, hidden_size, out_size):
        super().__init__()
        self.l1 = L.Linear(hidden_size)
        self.l2 = L.Linear(out_size)
    
    def forward(self, x):
        y = F.relu(self.l1(x))
        y = self.l2(y)
        return y

In [20]:
from dezero import Model

# データセット
np.random.rand(100, 1)
x = np.random.rand(100, 1)
y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)

lr = 0.2
iters = 10000

class TwoLayerNet(Model):
    def __init__(self, hidden_size, out_size):
        super().__init__()
        self.l1 = L.Linear(hidden_size)
        self.l2 = L.Linear(out_size)

    def forward(self, x):
        y = F.sigmoid(self.l1(x))
        y = self.l2(y)
        return y

model = TwoLayerNet(10, 1)

for i in range(iters):
    y_pred = model.forward(x) # もしくはmodel(x)でも同じ動作
    loss = F.mean_squared_error(y, y_pred)

    model.cleargrads()
    loss.backward()

    for p in model.params():
        p.data -= lr * p.grad.data

    if i % 1000 == 0:
        print(loss)

variable(0.9560034397090164)
variable(0.2917471033390681)
variable(0.27471874421592535)
variable(0.1940271242756003)
variable(0.08646121101319171)
variable(0.07650937143587871)
variable(0.07575069420685243)
variable(0.07527433738647474)
variable(0.07536095937770858)
variable(0.11593465522721468)


### オプティマイザ（最適化手法）

In [21]:
from dezero import optimizers

# データセット
np.random.seed(0)
x = np.random.rand(100, 1)
y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)

lr = 0.2
iters = 10000

class TwoLayerNet(Model):
    def __init__(self, hidden_size, out_size):
        super().__init__()
        self.l1 = L.Linear(hidden_size)
        self.l2 = L.Linear(out_size)

    def forward(self, x):
        y = F.sigmoid(self.l1(x))
        y = self.l2(y)
        return y

model = TwoLayerNet(10, 1)
optimizers = optimizers.SGD(lr) # オプティマイザの生成
optimizers.setup(model)

for i in range(iters):
    y_pred = model(x)
    loss = F.mean_squared_error(y, y_pred)

    model.cleargrads()
    loss.backward()

    optimizers.update() # オプティマイザによる更新
    if i % 1000 == 0:
        print(loss)

variable(0.8165178492839196)
variable(0.24990280802148895)
variable(0.24609876581126014)
variable(0.2372159081431807)
variable(0.20793216413350174)
variable(0.12311905720649353)
variable(0.07888166506355149)
variable(0.07655073683421637)
variable(0.0763780308623822)
variable(0.07618764131185572)


In [22]:
def one_hot(state):
    HEIGHT, WIDTH = 3, 4
    vec = np.zeros(HEIGHT * WIDTH, dtype=np.float32)
    y, x = state
    idx = WIDTH * y + x
    vec[idx] = 1.0
    return vec[np.newaxis, :] # バッチのための新しい軸を追加

state = (2, 0)
x = one_hot(state)

print(x.shape)
print(x)

(1, 12)
[[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]


### Q関数を表すニューラルネットワーク

In [23]:
from collections import defaultdict

Q = defaultdict(lambda: 0)
state = (2, 0)
action = 0

print(Q[state, action])

0


In [24]:
class QNet(Model):
    def __init__(self):
        super().__init__()
        self.l1 = L.Linear(100) # 中間層のサイズ
        self.l2 = L.Linear(4) # 行動のサイズ
    
    def forward(self, x):
        x = F.relu(self.l1(x))
        x = self.l2(x)
        return x

qnet = QNet()

state = (2, 0)
state = one_hot(state) # one-hotベクトルに変換

qs = qnet(state)
print(qs.shape)

(1, 4)


### ニューラルネットワークとQ学習

In [25]:
class QLearningAgent:
    def __init__(self):
        self.gamma = 0.9
        self.lr = 0.01
        self.epsilon = 0.1
        self.action_size = 4

        self.qnet = QNet()
        self.optimizer = optimizers.SGD(self.lr)
        self.optimizer.setup(self.qnet)
    
    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(self.action_size)
        else:
            qs = self.qnet(state)
            return qs.data.argmax()
    
    def update(self, state, action, reward, next_state, done):
        if done:
            next_q = np.zeros(1) # [0.]
        else:
            next_qs = self.qnet(next_state)
            next_q = next_qs.max(axis=1)
            next_q.unchain()
        
        target = self.gamma * next_q + reward
        qs = self.qnet(state)
        q = qs[:, action]
        loss = F.mean_squared_error(target, q)

        self.qnet.cleargrads()
        loss.backward()
        self.optimizer.update()

        return loss.data

In [29]:
import gridworld as gw
from dezero import optimizers

In [30]:
env = gw.GridWorld()
agent = QLearningAgent()

episodes = 1000 # エピソード数
loss_history = []

for episode in range(episodes):
    state = env.reset()
    state = one_hot(state)
    total_loss, cnt = 0, 0
    done = False

    while not done:
        action = agent.get_action(state)
        next_state, reward, done = env.step(action)
        next_state = one_hot(next_state)

        loss = agent.update(state, action, reward, next_state, done)
        total_loss += loss
        cnt += 1
        state = next_state
    
    average_loss = total_loss / cnt
    loss_history.append(average_loss)