# Q-Learning

* Q Table은 초기에 0값 (np.zeros) 로 시작합니다. 
* Q Table의 row는 states가 되고, column은 actions로 간주

### ForzenLake v0

```
SFFF       (S: starting point, safe)
FHFH       (F: frozen surface, safe)
FFFH       (H: hole, fall to your doom)
HFFG       (G: goal, where the frisbee is located)
```

Actions 

* 오른쪽으로 이동: 3


### Bellman Equation 



## $$ Q(s, a) = learning\ rate \cdot (r + \gamma( max(Q(s^{\prime}, a^{\prime})))) $$ 

# Code Implementation

### Load Dependencies

In [1]:
import gym
import numpy as np
from time import sleep
from IPython.display import clear_output

In [4]:
class QLearning(object):
    
    def __init__(self, game, epoch = 20000, lr=0.65, gamma=0.90):
        self.env = gym.make(game)
        self.epoch = epoch
        self.lr = lr
        self.gamma = gamma
        self.Q = np.zeros((self.env.observation_space.n, self.env.action_space.n))
        
    def train(self):
        env, Q, lr, gamma = self.env, self.Q, self.lr, self.gamma
        for i in xrange(1, self.epoch+1):
            state = self.env.reset()
            
            while True:
                action = np.argmax(Q[state,:] + np.random.randn(1, env.action_space.n)*(1./(i+1)))
                next_state, reward, done, info = env.step(action)
                
                Q[state, action] += lr*(reward + gamma * max(Q[next_state,:]) - Q[state, action])
                if done:
                    break
                    
                state = next_state
            
            if i% 1000 == 0:
                self.render()
                print 'epoch:', i
                print Q
            
    def play(self):
        env, Q = self.env, self.Q
        score = 0
        count = 0
        state = env.reset()
        while True:
            action = np.argmax(Q[state, :] + np.random.randn(1, env.action_space.n) * 0.01)
            state, reward, done, info = env.step(action)
            
            self.render()
            count += 1
            score += reward
            if done:
                self.render()
                break
        print 'Total Score:', score, 'Move Count:', count
    
    def render(self):
        clear_output(True)
        self.env.render()
        sleep(0.1)

q = QLearning('FrozenLake-v0')
q.train()
q.play()

SFFF
FHFH
FFFH
HFF[41mG[0m
  (Right)
Total Score: 1.0 Move Count: 23
