In [133]:
%matplotlib inline

import gym
import random
import numpy as np
from collections import deque
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import matplotlib.pyplot as plt

![Dyna-learning](dyna.png)

In [134]:
env = gym.make('MountainCar-v0')
env.reset()

array([-0.47332795,  0.        ])

In [139]:
class Dyna:
    def __init__(self, env, q_approx_model, memory_size=1_000_000):
        self.env = env
        self.q_approx_model = q_approx_model
        self.real_experience = deque(maxlen=memory_size)

    def remember(self, state, action, newReward, newState, terminated):
        self.real_experience.append((state, action, newReward, newState, terminated))
    
    def run_episode(self, render=False, exposition=False):
        state = self.env.reset()
        state = np.reshape(state, [1, self.env.observation_space.shape[0]])
        total_reward = 0
        
        t = 0
        while True:
            t += 1

            if render:
                self.env.render()

            if exposition:
                action = self.q_approx_model.greedy_action(state)
            else:
                action = self.q_approx_model.epsilon_greedy(state)
            
            newState, newReward, terminated, info = self.env.step(action)            
            newState = np.reshape(
                newState, [1, self.env.observation_space.shape[0]])

            if not exposition:
                # Add to memory
                self.remember(state, action, newReward, newState, terminated)

            # Update QApproxModel (d)
            self.q_approx_model.experience_replay(
                [(state, action, newReward, newState, terminated)])

#            # Update Environment Model (e)
#            self.env_model.update()         
            
            # Update state
            state = newState
            
            total_reward += newReward
            if terminated:
                break
                
        self.env.close()
        
        return t, total_reward

    def run(self, n_episodes, render=False):
        episode_lengths = []
        for t in range(n_episodes):
            episode_length, total_reward = self.run_episode()
            print(
                f"Episode: {t+1} "
                f"Length: {episode_length} "
                f"Total reward: {total_reward} "
            )
            episode_lengths.append(episode_length)  
                        
        return np.array(episode_lengths)    
    

In [140]:
q = QApproxModel(env)
dyna = Dyna(env, q)

In [141]:
dyna.run(3500)

Episode: 1 Length: 200 Total reward: -200.0 
Episode: 2 Length: 200 Total reward: -200.0 
Episode: 3 Length: 200 Total reward: -200.0 
Episode: 4 Length: 200 Total reward: -200.0 
Episode: 5 Length: 200 Total reward: -200.0 
Episode: 6 Length: 200 Total reward: -200.0 
Episode: 7 Length: 200 Total reward: -200.0 
Episode: 8 Length: 200 Total reward: -200.0 
Episode: 9 Length: 200 Total reward: -200.0 
Episode: 10 Length: 200 Total reward: -200.0 
Episode: 11 Length: 200 Total reward: -200.0 
Episode: 12 Length: 200 Total reward: -200.0 
Episode: 13 Length: 200 Total reward: -200.0 
Episode: 14 Length: 200 Total reward: -200.0 
Episode: 15 Length: 200 Total reward: -200.0 
Episode: 16 Length: 200 Total reward: -200.0 
Episode: 17 Length: 200 Total reward: -200.0 
Episode: 18 Length: 200 Total reward: -200.0 
Episode: 19 Length: 200 Total reward: -200.0 
Episode: 20 Length: 200 Total reward: -200.0 
Episode: 21 Length: 200 Total reward: -200.0 
Episode: 22 Length: 200 Total reward: -200.

Episode: 178 Length: 200 Total reward: -200.0 
Episode: 179 Length: 200 Total reward: -200.0 
Episode: 180 Length: 200 Total reward: -200.0 
Episode: 181 Length: 200 Total reward: -200.0 
Episode: 182 Length: 200 Total reward: -200.0 
Episode: 183 Length: 200 Total reward: -200.0 
Episode: 184 Length: 200 Total reward: -200.0 
Episode: 185 Length: 200 Total reward: -200.0 
Episode: 186 Length: 200 Total reward: -200.0 
Episode: 187 Length: 200 Total reward: -200.0 
Episode: 188 Length: 200 Total reward: -200.0 
Episode: 189 Length: 200 Total reward: -200.0 
Episode: 190 Length: 200 Total reward: -200.0 
Episode: 191 Length: 200 Total reward: -200.0 
Episode: 192 Length: 200 Total reward: -200.0 
Episode: 193 Length: 200 Total reward: -200.0 
Episode: 194 Length: 200 Total reward: -200.0 
Episode: 195 Length: 200 Total reward: -200.0 
Episode: 196 Length: 200 Total reward: -200.0 
Episode: 197 Length: 200 Total reward: -200.0 
Episode: 198 Length: 200 Total reward: -200.0 
Episode: 199 

Episode: 353 Length: 200 Total reward: -200.0 
Episode: 354 Length: 200 Total reward: -200.0 
Episode: 355 Length: 200 Total reward: -200.0 
Episode: 356 Length: 200 Total reward: -200.0 
Episode: 357 Length: 200 Total reward: -200.0 
Episode: 358 Length: 200 Total reward: -200.0 
Episode: 359 Length: 200 Total reward: -200.0 
Episode: 360 Length: 200 Total reward: -200.0 
Episode: 361 Length: 200 Total reward: -200.0 
Episode: 362 Length: 200 Total reward: -200.0 
Episode: 363 Length: 200 Total reward: -200.0 
Episode: 364 Length: 200 Total reward: -200.0 
Episode: 365 Length: 200 Total reward: -200.0 
Episode: 366 Length: 200 Total reward: -200.0 
Episode: 367 Length: 200 Total reward: -200.0 
Episode: 368 Length: 200 Total reward: -200.0 
Episode: 369 Length: 200 Total reward: -200.0 
Episode: 370 Length: 200 Total reward: -200.0 
Episode: 371 Length: 200 Total reward: -200.0 
Episode: 372 Length: 200 Total reward: -200.0 
Episode: 373 Length: 200 Total reward: -200.0 
Episode: 374 

Episode: 528 Length: 200 Total reward: -200.0 
Episode: 529 Length: 200 Total reward: -200.0 
Episode: 530 Length: 200 Total reward: -200.0 
Episode: 531 Length: 200 Total reward: -200.0 
Episode: 532 Length: 200 Total reward: -200.0 
Episode: 533 Length: 200 Total reward: -200.0 
Episode: 534 Length: 200 Total reward: -200.0 
Episode: 535 Length: 200 Total reward: -200.0 
Episode: 536 Length: 200 Total reward: -200.0 
Episode: 537 Length: 200 Total reward: -200.0 
Episode: 538 Length: 200 Total reward: -200.0 
Episode: 539 Length: 200 Total reward: -200.0 
Episode: 540 Length: 200 Total reward: -200.0 
Episode: 541 Length: 200 Total reward: -200.0 
Episode: 542 Length: 200 Total reward: -200.0 
Episode: 543 Length: 200 Total reward: -200.0 
Episode: 544 Length: 200 Total reward: -200.0 
Episode: 545 Length: 200 Total reward: -200.0 
Episode: 546 Length: 200 Total reward: -200.0 
Episode: 547 Length: 200 Total reward: -200.0 
Episode: 548 Length: 200 Total reward: -200.0 
Episode: 549 

Episode: 703 Length: 200 Total reward: -200.0 
Episode: 704 Length: 200 Total reward: -200.0 
Episode: 705 Length: 200 Total reward: -200.0 
Episode: 706 Length: 200 Total reward: -200.0 
Episode: 707 Length: 200 Total reward: -200.0 
Episode: 708 Length: 200 Total reward: -200.0 
Episode: 709 Length: 200 Total reward: -200.0 
Episode: 710 Length: 200 Total reward: -200.0 
Episode: 711 Length: 200 Total reward: -200.0 
Episode: 712 Length: 200 Total reward: -200.0 
Episode: 713 Length: 200 Total reward: -200.0 
Episode: 714 Length: 200 Total reward: -200.0 
Episode: 715 Length: 200 Total reward: -200.0 
Episode: 716 Length: 200 Total reward: -200.0 
Episode: 717 Length: 200 Total reward: -200.0 
Episode: 718 Length: 200 Total reward: -200.0 
Episode: 719 Length: 200 Total reward: -200.0 
Episode: 720 Length: 200 Total reward: -200.0 
Episode: 721 Length: 200 Total reward: -200.0 
Episode: 722 Length: 200 Total reward: -200.0 
Episode: 723 Length: 200 Total reward: -200.0 
Episode: 724 

Episode: 878 Length: 200 Total reward: -200.0 
Episode: 879 Length: 200 Total reward: -200.0 
Episode: 880 Length: 200 Total reward: -200.0 
Episode: 881 Length: 200 Total reward: -200.0 
Episode: 882 Length: 200 Total reward: -200.0 
Episode: 883 Length: 200 Total reward: -200.0 
Episode: 884 Length: 200 Total reward: -200.0 
Episode: 885 Length: 200 Total reward: -200.0 
Episode: 886 Length: 200 Total reward: -200.0 
Episode: 887 Length: 200 Total reward: -200.0 
Episode: 888 Length: 200 Total reward: -200.0 
Episode: 889 Length: 200 Total reward: -200.0 
Episode: 890 Length: 200 Total reward: -200.0 
Episode: 891 Length: 200 Total reward: -200.0 
Episode: 892 Length: 200 Total reward: -200.0 
Episode: 893 Length: 200 Total reward: -200.0 
Episode: 894 Length: 200 Total reward: -200.0 
Episode: 895 Length: 200 Total reward: -200.0 
Episode: 896 Length: 200 Total reward: -200.0 
Episode: 897 Length: 200 Total reward: -200.0 
Episode: 898 Length: 200 Total reward: -200.0 
Episode: 899 

Episode: 1052 Length: 200 Total reward: -200.0 
Episode: 1053 Length: 200 Total reward: -200.0 
Episode: 1054 Length: 200 Total reward: -200.0 
Episode: 1055 Length: 200 Total reward: -200.0 
Episode: 1056 Length: 200 Total reward: -200.0 
Episode: 1057 Length: 200 Total reward: -200.0 
Episode: 1058 Length: 200 Total reward: -200.0 
Episode: 1059 Length: 200 Total reward: -200.0 
Episode: 1060 Length: 200 Total reward: -200.0 
Episode: 1061 Length: 200 Total reward: -200.0 
Episode: 1062 Length: 200 Total reward: -200.0 
Episode: 1063 Length: 200 Total reward: -200.0 
Episode: 1064 Length: 200 Total reward: -200.0 
Episode: 1065 Length: 200 Total reward: -200.0 
Episode: 1066 Length: 200 Total reward: -200.0 
Episode: 1067 Length: 200 Total reward: -200.0 
Episode: 1068 Length: 200 Total reward: -200.0 
Episode: 1069 Length: 200 Total reward: -200.0 
Episode: 1070 Length: 200 Total reward: -200.0 
Episode: 1071 Length: 200 Total reward: -200.0 
Episode: 1072 Length: 200 Total reward: 

Episode: 1223 Length: 200 Total reward: -200.0 
Episode: 1224 Length: 200 Total reward: -200.0 
Episode: 1225 Length: 200 Total reward: -200.0 
Episode: 1226 Length: 200 Total reward: -200.0 
Episode: 1227 Length: 200 Total reward: -200.0 
Episode: 1228 Length: 200 Total reward: -200.0 
Episode: 1229 Length: 200 Total reward: -200.0 
Episode: 1230 Length: 200 Total reward: -200.0 
Episode: 1231 Length: 200 Total reward: -200.0 
Episode: 1232 Length: 200 Total reward: -200.0 
Episode: 1233 Length: 200 Total reward: -200.0 
Episode: 1234 Length: 200 Total reward: -200.0 
Episode: 1235 Length: 200 Total reward: -200.0 
Episode: 1236 Length: 200 Total reward: -200.0 
Episode: 1237 Length: 200 Total reward: -200.0 
Episode: 1238 Length: 200 Total reward: -200.0 
Episode: 1239 Length: 200 Total reward: -200.0 
Episode: 1240 Length: 200 Total reward: -200.0 
Episode: 1241 Length: 200 Total reward: -200.0 
Episode: 1242 Length: 200 Total reward: -200.0 
Episode: 1243 Length: 200 Total reward: 

Episode: 1394 Length: 200 Total reward: -200.0 
Episode: 1395 Length: 200 Total reward: -200.0 
Episode: 1396 Length: 200 Total reward: -200.0 
Episode: 1397 Length: 200 Total reward: -200.0 
Episode: 1398 Length: 200 Total reward: -200.0 
Episode: 1399 Length: 200 Total reward: -200.0 
Episode: 1400 Length: 200 Total reward: -200.0 
Episode: 1401 Length: 200 Total reward: -200.0 
Episode: 1402 Length: 200 Total reward: -200.0 
Episode: 1403 Length: 200 Total reward: -200.0 
Episode: 1404 Length: 200 Total reward: -200.0 
Episode: 1405 Length: 200 Total reward: -200.0 
Episode: 1406 Length: 200 Total reward: -200.0 
Episode: 1407 Length: 200 Total reward: -200.0 
Episode: 1408 Length: 200 Total reward: -200.0 
Episode: 1409 Length: 200 Total reward: -200.0 
Episode: 1410 Length: 200 Total reward: -200.0 
Episode: 1411 Length: 200 Total reward: -200.0 
Episode: 1412 Length: 200 Total reward: -200.0 
Episode: 1413 Length: 200 Total reward: -200.0 
Episode: 1414 Length: 200 Total reward: 

Episode: 1565 Length: 200 Total reward: -200.0 
Episode: 1566 Length: 200 Total reward: -200.0 
Episode: 1567 Length: 200 Total reward: -200.0 
Episode: 1568 Length: 200 Total reward: -200.0 
Episode: 1569 Length: 200 Total reward: -200.0 
Episode: 1570 Length: 200 Total reward: -200.0 
Episode: 1571 Length: 200 Total reward: -200.0 
Episode: 1572 Length: 200 Total reward: -200.0 
Episode: 1573 Length: 200 Total reward: -200.0 
Episode: 1574 Length: 200 Total reward: -200.0 
Episode: 1575 Length: 200 Total reward: -200.0 
Episode: 1576 Length: 200 Total reward: -200.0 
Episode: 1577 Length: 200 Total reward: -200.0 
Episode: 1578 Length: 200 Total reward: -200.0 
Episode: 1579 Length: 200 Total reward: -200.0 
Episode: 1580 Length: 200 Total reward: -200.0 
Episode: 1581 Length: 200 Total reward: -200.0 
Episode: 1582 Length: 200 Total reward: -200.0 
Episode: 1583 Length: 200 Total reward: -200.0 
Episode: 1584 Length: 200 Total reward: -200.0 
Episode: 1585 Length: 200 Total reward: 

Episode: 1736 Length: 200 Total reward: -200.0 
Episode: 1737 Length: 200 Total reward: -200.0 
Episode: 1738 Length: 200 Total reward: -200.0 
Episode: 1739 Length: 200 Total reward: -200.0 
Episode: 1740 Length: 200 Total reward: -200.0 
Episode: 1741 Length: 200 Total reward: -200.0 
Episode: 1742 Length: 200 Total reward: -200.0 
Episode: 1743 Length: 200 Total reward: -200.0 
Episode: 1744 Length: 200 Total reward: -200.0 
Episode: 1745 Length: 200 Total reward: -200.0 
Episode: 1746 Length: 200 Total reward: -200.0 
Episode: 1747 Length: 200 Total reward: -200.0 
Episode: 1748 Length: 200 Total reward: -200.0 
Episode: 1749 Length: 200 Total reward: -200.0 
Episode: 1750 Length: 200 Total reward: -200.0 
Episode: 1751 Length: 200 Total reward: -200.0 
Episode: 1752 Length: 200 Total reward: -200.0 
Episode: 1753 Length: 200 Total reward: -200.0 
Episode: 1754 Length: 200 Total reward: -200.0 
Episode: 1755 Length: 200 Total reward: -200.0 
Episode: 1756 Length: 200 Total reward: 

Episode: 1907 Length: 200 Total reward: -200.0 
Episode: 1908 Length: 200 Total reward: -200.0 
Episode: 1909 Length: 200 Total reward: -200.0 
Episode: 1910 Length: 200 Total reward: -200.0 
Episode: 1911 Length: 200 Total reward: -200.0 
Episode: 1912 Length: 200 Total reward: -200.0 
Episode: 1913 Length: 200 Total reward: -200.0 
Episode: 1914 Length: 200 Total reward: -200.0 
Episode: 1915 Length: 200 Total reward: -200.0 
Episode: 1916 Length: 200 Total reward: -200.0 
Episode: 1917 Length: 200 Total reward: -200.0 
Episode: 1918 Length: 200 Total reward: -200.0 
Episode: 1919 Length: 200 Total reward: -200.0 
Episode: 1920 Length: 200 Total reward: -200.0 
Episode: 1921 Length: 200 Total reward: -200.0 
Episode: 1922 Length: 200 Total reward: -200.0 
Episode: 1923 Length: 200 Total reward: -200.0 
Episode: 1924 Length: 200 Total reward: -200.0 
Episode: 1925 Length: 200 Total reward: -200.0 
Episode: 1926 Length: 200 Total reward: -200.0 
Episode: 1927 Length: 200 Total reward: 

Episode: 2078 Length: 200 Total reward: -200.0 
Episode: 2079 Length: 200 Total reward: -200.0 
Episode: 2080 Length: 200 Total reward: -200.0 
Episode: 2081 Length: 200 Total reward: -200.0 
Episode: 2082 Length: 200 Total reward: -200.0 
Episode: 2083 Length: 200 Total reward: -200.0 
Episode: 2084 Length: 200 Total reward: -200.0 
Episode: 2085 Length: 200 Total reward: -200.0 
Episode: 2086 Length: 200 Total reward: -200.0 
Episode: 2087 Length: 200 Total reward: -200.0 
Episode: 2088 Length: 200 Total reward: -200.0 
Episode: 2089 Length: 200 Total reward: -200.0 
Episode: 2090 Length: 200 Total reward: -200.0 
Episode: 2091 Length: 200 Total reward: -200.0 
Episode: 2092 Length: 200 Total reward: -200.0 
Episode: 2093 Length: 200 Total reward: -200.0 
Episode: 2094 Length: 200 Total reward: -200.0 
Episode: 2095 Length: 200 Total reward: -200.0 
Episode: 2096 Length: 200 Total reward: -200.0 
Episode: 2097 Length: 200 Total reward: -200.0 
Episode: 2098 Length: 200 Total reward: 

Episode: 2249 Length: 200 Total reward: -200.0 
Episode: 2250 Length: 200 Total reward: -200.0 
Episode: 2251 Length: 200 Total reward: -200.0 
Episode: 2252 Length: 200 Total reward: -200.0 
Episode: 2253 Length: 200 Total reward: -200.0 
Episode: 2254 Length: 200 Total reward: -200.0 
Episode: 2255 Length: 200 Total reward: -200.0 
Episode: 2256 Length: 200 Total reward: -200.0 
Episode: 2257 Length: 200 Total reward: -200.0 
Episode: 2258 Length: 200 Total reward: -200.0 
Episode: 2259 Length: 200 Total reward: -200.0 
Episode: 2260 Length: 200 Total reward: -200.0 
Episode: 2261 Length: 200 Total reward: -200.0 
Episode: 2262 Length: 200 Total reward: -200.0 
Episode: 2263 Length: 200 Total reward: -200.0 
Episode: 2264 Length: 200 Total reward: -200.0 
Episode: 2265 Length: 200 Total reward: -200.0 
Episode: 2266 Length: 200 Total reward: -200.0 
Episode: 2267 Length: 200 Total reward: -200.0 
Episode: 2268 Length: 200 Total reward: -200.0 
Episode: 2269 Length: 200 Total reward: 

Episode: 2420 Length: 200 Total reward: -200.0 
Episode: 2421 Length: 200 Total reward: -200.0 
Episode: 2422 Length: 200 Total reward: -200.0 
Episode: 2423 Length: 200 Total reward: -200.0 
Episode: 2424 Length: 200 Total reward: -200.0 
Episode: 2425 Length: 200 Total reward: -200.0 
Episode: 2426 Length: 200 Total reward: -200.0 
Episode: 2427 Length: 200 Total reward: -200.0 
Episode: 2428 Length: 200 Total reward: -200.0 
Episode: 2429 Length: 200 Total reward: -200.0 
Episode: 2430 Length: 200 Total reward: -200.0 
Episode: 2431 Length: 200 Total reward: -200.0 
Episode: 2432 Length: 200 Total reward: -200.0 
Episode: 2433 Length: 200 Total reward: -200.0 
Episode: 2434 Length: 200 Total reward: -200.0 
Episode: 2435 Length: 200 Total reward: -200.0 
Episode: 2436 Length: 200 Total reward: -200.0 
Episode: 2437 Length: 200 Total reward: -200.0 
Episode: 2438 Length: 200 Total reward: -200.0 
Episode: 2439 Length: 200 Total reward: -200.0 
Episode: 2440 Length: 200 Total reward: 

Episode: 2591 Length: 200 Total reward: -200.0 
Episode: 2592 Length: 200 Total reward: -200.0 
Episode: 2593 Length: 200 Total reward: -200.0 
Episode: 2594 Length: 200 Total reward: -200.0 
Episode: 2595 Length: 200 Total reward: -200.0 
Episode: 2596 Length: 200 Total reward: -200.0 
Episode: 2597 Length: 200 Total reward: -200.0 
Episode: 2598 Length: 200 Total reward: -200.0 
Episode: 2599 Length: 200 Total reward: -200.0 
Episode: 2600 Length: 200 Total reward: -200.0 
Episode: 2601 Length: 200 Total reward: -200.0 
Episode: 2602 Length: 200 Total reward: -200.0 
Episode: 2603 Length: 200 Total reward: -200.0 
Episode: 2604 Length: 200 Total reward: -200.0 
Episode: 2605 Length: 200 Total reward: -200.0 
Episode: 2606 Length: 200 Total reward: -200.0 
Episode: 2607 Length: 200 Total reward: -200.0 
Episode: 2608 Length: 200 Total reward: -200.0 
Episode: 2609 Length: 200 Total reward: -200.0 
Episode: 2610 Length: 200 Total reward: -200.0 
Episode: 2611 Length: 200 Total reward: 

Episode: 2762 Length: 200 Total reward: -200.0 
Episode: 2763 Length: 200 Total reward: -200.0 
Episode: 2764 Length: 200 Total reward: -200.0 
Episode: 2765 Length: 200 Total reward: -200.0 
Episode: 2766 Length: 200 Total reward: -200.0 
Episode: 2767 Length: 200 Total reward: -200.0 
Episode: 2768 Length: 200 Total reward: -200.0 
Episode: 2769 Length: 200 Total reward: -200.0 
Episode: 2770 Length: 200 Total reward: -200.0 
Episode: 2771 Length: 200 Total reward: -200.0 
Episode: 2772 Length: 200 Total reward: -200.0 
Episode: 2773 Length: 200 Total reward: -200.0 
Episode: 2774 Length: 200 Total reward: -200.0 
Episode: 2775 Length: 200 Total reward: -200.0 
Episode: 2776 Length: 200 Total reward: -200.0 
Episode: 2777 Length: 200 Total reward: -200.0 
Episode: 2778 Length: 200 Total reward: -200.0 
Episode: 2779 Length: 200 Total reward: -200.0 
Episode: 2780 Length: 200 Total reward: -200.0 
Episode: 2781 Length: 200 Total reward: -200.0 
Episode: 2782 Length: 200 Total reward: 

Episode: 2933 Length: 200 Total reward: -200.0 
Episode: 2934 Length: 200 Total reward: -200.0 
Episode: 2935 Length: 200 Total reward: -200.0 
Episode: 2936 Length: 200 Total reward: -200.0 
Episode: 2937 Length: 200 Total reward: -200.0 
Episode: 2938 Length: 200 Total reward: -200.0 
Episode: 2939 Length: 200 Total reward: -200.0 
Episode: 2940 Length: 200 Total reward: -200.0 
Episode: 2941 Length: 200 Total reward: -200.0 
Episode: 2942 Length: 200 Total reward: -200.0 
Episode: 2943 Length: 200 Total reward: -200.0 
Episode: 2944 Length: 200 Total reward: -200.0 
Episode: 2945 Length: 200 Total reward: -200.0 
Episode: 2946 Length: 200 Total reward: -200.0 
Episode: 2947 Length: 200 Total reward: -200.0 
Episode: 2948 Length: 200 Total reward: -200.0 
Episode: 2949 Length: 200 Total reward: -200.0 
Episode: 2950 Length: 200 Total reward: -200.0 
Episode: 2951 Length: 200 Total reward: -200.0 
Episode: 2952 Length: 200 Total reward: -200.0 
Episode: 2953 Length: 200 Total reward: 

Episode: 3104 Length: 200 Total reward: -200.0 
Episode: 3105 Length: 200 Total reward: -200.0 
Episode: 3106 Length: 200 Total reward: -200.0 
Episode: 3107 Length: 200 Total reward: -200.0 
Episode: 3108 Length: 200 Total reward: -200.0 
Episode: 3109 Length: 200 Total reward: -200.0 
Episode: 3110 Length: 200 Total reward: -200.0 
Episode: 3111 Length: 200 Total reward: -200.0 
Episode: 3112 Length: 200 Total reward: -200.0 
Episode: 3113 Length: 200 Total reward: -200.0 
Episode: 3114 Length: 200 Total reward: -200.0 
Episode: 3115 Length: 200 Total reward: -200.0 
Episode: 3116 Length: 200 Total reward: -200.0 
Episode: 3117 Length: 200 Total reward: -200.0 
Episode: 3118 Length: 200 Total reward: -200.0 
Episode: 3119 Length: 200 Total reward: -200.0 
Episode: 3120 Length: 200 Total reward: -200.0 
Episode: 3121 Length: 200 Total reward: -200.0 
Episode: 3122 Length: 200 Total reward: -200.0 
Episode: 3123 Length: 200 Total reward: -200.0 
Episode: 3124 Length: 200 Total reward: 

Episode: 3275 Length: 200 Total reward: -200.0 
Episode: 3276 Length: 200 Total reward: -200.0 
Episode: 3277 Length: 200 Total reward: -200.0 
Episode: 3278 Length: 200 Total reward: -200.0 
Episode: 3279 Length: 200 Total reward: -200.0 
Episode: 3280 Length: 200 Total reward: -200.0 
Episode: 3281 Length: 200 Total reward: -200.0 
Episode: 3282 Length: 200 Total reward: -200.0 
Episode: 3283 Length: 200 Total reward: -200.0 
Episode: 3284 Length: 200 Total reward: -200.0 
Episode: 3285 Length: 200 Total reward: -200.0 
Episode: 3286 Length: 200 Total reward: -200.0 
Episode: 3287 Length: 200 Total reward: -200.0 
Episode: 3288 Length: 200 Total reward: -200.0 
Episode: 3289 Length: 200 Total reward: -200.0 
Episode: 3290 Length: 200 Total reward: -200.0 
Episode: 3291 Length: 200 Total reward: -200.0 
Episode: 3292 Length: 200 Total reward: -200.0 
Episode: 3293 Length: 200 Total reward: -200.0 
Episode: 3294 Length: 200 Total reward: -200.0 
Episode: 3295 Length: 200 Total reward: 

Episode: 3446 Length: 200 Total reward: -200.0 
Episode: 3447 Length: 200 Total reward: -200.0 
Episode: 3448 Length: 200 Total reward: -200.0 
Episode: 3449 Length: 200 Total reward: -200.0 
Episode: 3450 Length: 200 Total reward: -200.0 
Episode: 3451 Length: 200 Total reward: -200.0 
Episode: 3452 Length: 200 Total reward: -200.0 
Episode: 3453 Length: 200 Total reward: -200.0 
Episode: 3454 Length: 200 Total reward: -200.0 
Episode: 3455 Length: 200 Total reward: -200.0 
Episode: 3456 Length: 200 Total reward: -200.0 
Episode: 3457 Length: 200 Total reward: -200.0 
Episode: 3458 Length: 200 Total reward: -200.0 
Episode: 3459 Length: 200 Total reward: -200.0 
Episode: 3460 Length: 200 Total reward: -200.0 
Episode: 3461 Length: 200 Total reward: -200.0 
Episode: 3462 Length: 200 Total reward: -200.0 
Episode: 3463 Length: 200 Total reward: -200.0 
Episode: 3464 Length: 200 Total reward: -200.0 
Episode: 3465 Length: 200 Total reward: -200.0 
Episode: 3466 Length: 200 Total reward: 

array([200, 200, 200, ..., 200, 200, 200])

In [135]:
class QApproxModel:
    def __init__(
            self, 
            env,
            model_fit_kwargs={
                'verbose': 0,
                'batch_size': 32,
                'epochs': 1,
                'use_multiprocessing': True},
            eps=0.9,
            alpha=0.5,
            gamma=0.9,
            adam_learning_rate=0.1,
    ):
        """
        """
        self.env = env
        
        self.model = Sequential()
        self.model.add(Dense(24, input_shape=(
            self.env.observation_space.shape[0],), activation="relu"))
        self.model.add(Dense(24, activation="relu"))
        self.model.add(Dense(24, activation="relu"))
        self.model.add(Dense(self.env.action_space.n, activation="linear"))
        self.model.compile(loss="mse", optimizer=Adam(lr=adam_learning_rate))
        self.model_fit_kwargs = model_fit_kwargs
        
        self.eps = eps
        self.alpha = alpha        
        self.gamma = gamma

    def greedy_action(self, state):
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])
        
    def epsilon_greedy(self, state):
        if np.random.random() >= self.eps:
            return self.greedy_action(state)
    
        return self.env.action_space.sample()
        
    def get_X_y(self, experience):
        states = []
        q_valuess = []
        for idx, (state, action, newReward, newState, terminated) in enumerate(experience):
            q_update = newReward
            if not terminated:
                q_update = (newReward + self.gamma *
                            np.amax(self.model.predict(newState)[0]))
            q_values = self.model.predict(state)
            q_values[0][action] = (1-self.alpha) * \
                q_values[0][action] + self.alpha*q_update
        
            states.append(state.flatten())
            q_valuess.append(q_values.flatten())
            
        X = np.array(states)
        y = np.array(q_valuess)
        return X, y
    
    
    def experience_replay(self, experience):
        X, y = self.get_X_y(experience)
        self.update(X, y)
    
        
    def update(self, X, y):
        """
        """
        self.model.fit(
            X, y,
            **self.model_fit_kwargs)
    

In [136]:
class EnvModel:
    def __init__(self):
        """
        """        
    
    def update(self):
        pass
        
    def step(state, action):
        """
        Return state-prime and reward.
        """
        s_prime, v = env.reset()
        
        return s_prime, v

In [None]:
import maze
import numpy as np
import matplotlib.pyplot as plt

class DynaQ():
    def __init__(self, game, n,alpha,gamma, epsilon, max_steps):
        self.game = game
        self.env = game.make(BLOCKING_MAZE1)
        self.q = np.zeros((self.env.observation_space.n, self.env.action_space.n))
        self.epsilon = epsilon
        self.model =  Model(self.env.observation_space.n, self.env.action_space.n)
        self.n = n

    def learn(self):
        """ Perform DynaQ learning, return cumulative return """
        s = self.env.reset() # initialize first state
        cum_reward = [0] # cumulative reward

        # Loop forever!
        for step in range(max_steps):
            # Epsilon greedy action
            if np.random.uniform() < self.epsilon:
                a = self.env.action_space.sample()
            else:
                a = np.random.choice(np.where(self.q[s] == np.max(self.q[s]))[0])

            # Take action, observe outcome
            s_prime, r, done, info = self.env.step(a)

            # Q-Learning
            self.q[s,a] += alpha*(r + gamma*np.max(self.q[s_prime]) - self.q[s,a])

            # Learn model
            self.model.add(s,a,s_prime,r)

            # Planning for n steps
            self.planning()

            # Set state for next loop
            s = s_prime

            # Reset game if at the end
            if done:
                s = self.env.reset()

            # Check if time to switch board
            if step == 1000:
                self.env = self.game.make(BLOCKING_MAZE2)
                s = self.env.reset()

            # Add reward to count
            cum_reward.append(cum_reward[-1] + r)

        return np.array(cum_reward[1:])

    def planning(self):
        for i in range(self.n):
            s, a =  self.model.sample()
            s_prime, r = self.model.step(s,a)
            self.q[s,a] += alpha*(r + gamma*np.max(self.q[s_prime]) - self.q[s,a])

class Model():
    def __init__(self, n_states, n_actions):
        self.transitions = np.zeros((n_states,n_actions), dtype=np.uint8)
        self.rewards = np.zeros((n_states, n_actions))

    def add(self,s,a,s_prime,r):
        self.transitions[s,a] = s_prime
        self.rewards[s,a] = r

    def sample(self):
        """ Return random state, action"""
        # Random visited state
        s = np.random.choice(np.where(np.sum(self.transitions, axis=1) > 0)[0])
        # Random action in that state
        a = np.random.choice(np.where(self.transitions[s] > 0)[0])

        return s,a

    def step(self, s,a):
        """ Return state_prime and reward for state-action pair"""
        s_prime = self.transitions[s,a]
        r = self.rewards[s,a]

        return s_prime, r


    def plot_data(y):
        """ y is a 1D vector """
        x = np.arange(y.size)
        _ = plt.plot(x, y, '-')
        plt.show()

    def multi_plot_data(data, names):
        """ data, names are lists of vectors """
        x = np.arange(data[0].size)
        for i, y in enumerate(data):
            plt.plot(x, y, '-', markersize=2, label=names[i])
            plt.legend(loc='lower right', prop={'size': 16}, numpoints=5)
            plt.show()

if __name__ == '__main__':
    # Hyperparams

    alpha = 0.1 # learning rate
    gamma = 0.95 # discount
    epsilon = 0.3
    max_steps = 3000
    trials = 1

    dynaq_5_r = np.zeros((trials, max_steps))
    dynaq_50_r = np.zeros((trials, max_steps))
    qlearning_r = np.zeros((trials, max_steps))
    for t in range(trials):
        # DynaQ 5
        n = 5
        agent = DynaQ(maze, n, alpha, gamma, epsilon, max_steps)
        dynaq_5_r[t] = agent.learn()

        # DynaQ 50
        n = 50
        agent = DynaQ(maze, n, alpha, gamma, epsilon, max_steps)
        dynaq_50_r[t] = agent.learn()

        # Q-Learning
        n = 0
        agent = DynaQ(maze, n, alpha, gamma, epsilon, max_steps)
        qlearning_r[t] = agent.learn()

    # Average across trials
    dynaq_5_r = np.mean(dynaq_5_r, axis=0)
    dynaq_50_r = np.mean(dynaq_50_r, axis=0)
    qlearning_r = np.mean(qlearning_r, axis=0)

    data=[dynaq_5_r, dynaq_50_r, qlearning_r]
    names=["DynaQ, n=5", "DynaQ, n=50", "Q-Learning"]
    multi_plot_data(data,names)
