In [1]:
import gym
import numpy as np
import random
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import load_model

In [2]:
class Agent:
    
    def __init__(self,obs_size,action_size):
        
        self.obs_size=obs_size                          #size of tuple used to define any state in the game
        self.action_size=action_size                    #no. of possible actions
        self.gamma=0.95                                 #discount factor
        self.memory=deque(maxlen=2000)                  
        #stores past experiences, experiences in the distant past will be discarded
        #when the memory is full and experiences in the recent past will be used
        
        self.epsilon=1                                  #exploration factor, intially we will do 100% exploration
        self.epsilon_decay=0.995                        #how much the exploration factor will decrease after episode
        self.epsilon_min=0.01                           #minimum value of epsilon
        self.lr=0.001                                   #learning_rate for SGD
        self.save_path="mount.h5"
        self.model=self._create_model()
        
    def _create_model(self):
        
#         model=Sequential()
        
#         model.add(Dense(24,activation="relu",input_dim=self.obs_size))
#         model.add(Dense(24,activation="relu"))
#         model.add(Dense(self.action_size,activation="linear"))
        
#         model.compile(optimizer=Adam(lr=self.lr),loss="mse")                      

        model=self.load()
        
        return model
    
    def remember(self,cur_state,action,reward,next_state,done):
        self.memory.append((cur_state,action,reward,next_state,done))
    
    def act(self,state):
        
        if np.random.rand()<=self.epsilon:                                #exploration
            return random.randrange(self.action_size)#return random.sample(list(np.arange(self.action_size)),1)[0]
        
        return np.argmax(self.model.predict(state)[0])                     #exploitation
    
    def fit(self,batch_size=32):                                           
        
        batch=random.sample(self.memory,batch_size)
        
        for cur_state,action,reward,next_state,done in batch:
            
            if done:
                y_true=reward
                
            else:
                y_true=reward+(self.gamma*np.amax(self.model.predict(next_state)[0]))
                
            target=self.model.predict(cur_state)
            target[0][action]=y_true
            
            self.model.fit(cur_state,target,epochs=1,verbose=0)
            
        if self.epsilon>self.epsilon_min:
            self.epsilon*=self.epsilon_decay
            
    def save(self):
        self.model.save(self.save_path)
        
    def load(self):
        return load_model(self.save_path)

In [3]:
env=gym.make("MountainCar-v0")                           #creates a mountain car game environment
#env.max_episode_steps=1000

done=False
observation_size=env.observation_space.shape[0]
action_size=env.action_space.n
batch_size=32

agent=Agent(action_size=action_size,obs_size=observation_size)
agent

W0824 12:02:37.841754  7864 deprecation.py:506] From c:\users\akshay\anaconda3\envs\tf\lib\site-packages\tensorflow_core\python\ops\resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


<__main__.Agent at 0x24c63cd6518>

In [None]:
for episode in range(3000):
    
    state=env.reset()                                 #intial state for each episode
    state=np.reshape(state,(1,observation_size))
    
    for t in range(200):
        
        #env.render()
        
        action=agent.act(state)
        next_state,reward,done,other_info=env.step(action)
        
        next_state=np.reshape(next_state,(1,observation_size))
        reward=abs(next_state[0][0])
        
        if next_state[0][0]>0:
            reward*=3
        
        if done and t<199:
            reward=10000
        
        agent.remember(cur_state=state,action=action,reward=reward,next_state=next_state,done=done)
        state=next_state
        
        if done:
            print("Episode: ",episode," Score: ",-t," Exploration factor: ",agent.epsilon)
            break
    
    if len(agent.memory)>=batch_size:
        agent.fit(batch_size)
        
agent.save()
env.close()

Episode:  0  Score:  -199  Exploration factor:  1
Episode:  1  Score:  -199  Exploration factor:  0.995
Episode:  2  Score:  -199  Exploration factor:  0.990025
Episode:  3  Score:  -199  Exploration factor:  0.985074875
Episode:  4  Score:  -199  Exploration factor:  0.9801495006250001
Episode:  5  Score:  -199  Exploration factor:  0.9752487531218751
Episode:  6  Score:  -199  Exploration factor:  0.9703725093562657
Episode:  7  Score:  -199  Exploration factor:  0.9655206468094844
Episode:  8  Score:  -199  Exploration factor:  0.960693043575437
Episode:  9  Score:  -199  Exploration factor:  0.9558895783575597
Episode:  10  Score:  -199  Exploration factor:  0.9511101304657719
Episode:  11  Score:  -199  Exploration factor:  0.946354579813443
Episode:  12  Score:  -199  Exploration factor:  0.9416228069143757
Episode:  13  Score:  -199  Exploration factor:  0.9369146928798039
Episode:  14  Score:  -199  Exploration factor:  0.9322301194154049
Episode:  15  Score:  -199  Exploration

Episode:  122  Score:  -199  Exploration factor:  0.5425201222922789
Episode:  123  Score:  -199  Exploration factor:  0.5398075216808175
Episode:  124  Score:  -199  Exploration factor:  0.5371084840724134
Episode:  125  Score:  -199  Exploration factor:  0.5344229416520513
Episode:  126  Score:  -199  Exploration factor:  0.531750826943791
Episode:  127  Score:  -199  Exploration factor:  0.5290920728090721
Episode:  128  Score:  -199  Exploration factor:  0.5264466124450268
Episode:  129  Score:  -199  Exploration factor:  0.5238143793828016
Episode:  130  Score:  -199  Exploration factor:  0.5211953074858876
Episode:  131  Score:  -199  Exploration factor:  0.5185893309484582
Episode:  132  Score:  -199  Exploration factor:  0.5159963842937159
Episode:  133  Score:  -199  Exploration factor:  0.5134164023722473
Episode:  134  Score:  -199  Exploration factor:  0.510849320360386
Episode:  135  Score:  -199  Exploration factor:  0.5082950737585841
Episode:  136  Score:  -199  Explora

Episode:  241  Score:  -199  Exploration factor:  0.2987875242397482
Episode:  242  Score:  -199  Exploration factor:  0.29729358661854943
Episode:  243  Score:  -199  Exploration factor:  0.29580711868545667
Episode:  244  Score:  -151  Exploration factor:  0.2943280830920294
Episode:  245  Score:  -199  Exploration factor:  0.29285644267656924
Episode:  246  Score:  -149  Exploration factor:  0.2913921604631864
Episode:  247  Score:  -199  Exploration factor:  0.28993519966087045
Episode:  248  Score:  -199  Exploration factor:  0.2884855236625661
Episode:  249  Score:  -199  Exploration factor:  0.28704309604425327
Episode:  250  Score:  -199  Exploration factor:  0.285607880564032
Episode:  251  Score:  -199  Exploration factor:  0.28417984116121187
Episode:  252  Score:  -199  Exploration factor:  0.2827589419554058
Episode:  253  Score:  -199  Exploration factor:  0.28134514724562876
Episode:  254  Score:  -199  Exploration factor:  0.2799384215094006
Episode:  255  Score:  -199 

Episode:  359  Score:  -153  Exploration factor:  0.16538114245489302
Episode:  360  Score:  -109  Exploration factor:  0.16455423674261854
Episode:  361  Score:  -199  Exploration factor:  0.16373146555890544
Episode:  362  Score:  -177  Exploration factor:  0.16291280823111093
Episode:  363  Score:  -187  Exploration factor:  0.16209824418995536
Episode:  364  Score:  -145  Exploration factor:  0.16128775296900558
Episode:  365  Score:  -144  Exploration factor:  0.16048131420416054
Episode:  366  Score:  -145  Exploration factor:  0.15967890763313974
Episode:  367  Score:  -135  Exploration factor:  0.15888051309497406
Episode:  368  Score:  -151  Exploration factor:  0.1580861105294992
Episode:  369  Score:  -161  Exploration factor:  0.1572956799768517
Episode:  370  Score:  -175  Exploration factor:  0.15650920157696743
Episode:  371  Score:  -163  Exploration factor:  0.1557266555690826
Episode:  372  Score:  -144  Exploration factor:  0.1549480222912372
Episode:  373  Score:  -

Episode:  477  Score:  -107  Exploration factor:  0.09153970651645797
Episode:  478  Score:  -144  Exploration factor:  0.09108200798387568
Episode:  479  Score:  -152  Exploration factor:  0.0906265979439563
Episode:  480  Score:  -109  Exploration factor:  0.09017346495423652
Episode:  481  Score:  -145  Exploration factor:  0.08972259762946533
Episode:  482  Score:  -139  Exploration factor:  0.089273984641318
Episode:  483  Score:  -187  Exploration factor:  0.0888276147181114
Episode:  484  Score:  -145  Exploration factor:  0.08838347664452084
Episode:  485  Score:  -133  Exploration factor:  0.08794155926129824
Episode:  486  Score:  -142  Exploration factor:  0.08750185146499175
Episode:  487  Score:  -138  Exploration factor:  0.08706434220766679
Episode:  488  Score:  -108  Exploration factor:  0.08662902049662846
Episode:  489  Score:  -112  Exploration factor:  0.08619587539414532
Episode:  490  Score:  -113  Exploration factor:  0.08576489601717459
Episode:  491  Score:  -

Episode:  595  Score:  -120  Exploration factor:  0.05066791621302729
Episode:  596  Score:  -109  Exploration factor:  0.05041457663196215
Episode:  597  Score:  -148  Exploration factor:  0.050162503748802344
Episode:  598  Score:  -134  Exploration factor:  0.049911691230058335
Episode:  599  Score:  -110  Exploration factor:  0.04966213277390804
Episode:  600  Score:  -149  Exploration factor:  0.0494138221100385
Episode:  601  Score:  -137  Exploration factor:  0.04916675299948831
Episode:  602  Score:  -144  Exploration factor:  0.04892091923449087
Episode:  603  Score:  -136  Exploration factor:  0.04867631463831842
Episode:  604  Score:  -153  Exploration factor:  0.048432933065126825
Episode:  605  Score:  -106  Exploration factor:  0.048190768399801194
Episode:  606  Score:  -164  Exploration factor:  0.04794981455780219
Episode:  607  Score:  -141  Exploration factor:  0.04771006548501318
Episode:  608  Score:  -144  Exploration factor:  0.047471515157588115
Episode:  609  S

Episode:  712  Score:  -148  Exploration factor:  0.028186002814352063
Episode:  713  Score:  -142  Exploration factor:  0.0280450728002803
Episode:  714  Score:  -140  Exploration factor:  0.0279048474362789
Episode:  715  Score:  -167  Exploration factor:  0.027765323199097504
Episode:  716  Score:  -150  Exploration factor:  0.027626496583102015
Episode:  717  Score:  -130  Exploration factor:  0.027488364100186506
Episode:  718  Score:  -145  Exploration factor:  0.027350922279685573
Episode:  719  Score:  -169  Exploration factor:  0.027214167668287145
Episode:  720  Score:  -134  Exploration factor:  0.02707809682994571
Episode:  721  Score:  -171  Exploration factor:  0.02694270634579598
Episode:  722  Score:  -155  Exploration factor:  0.026807992814067
Episode:  723  Score:  -146  Exploration factor:  0.026673952849996664
Episode:  724  Score:  -152  Exploration factor:  0.02654058308574668
Episode:  725  Score:  -131  Exploration factor:  0.026407880170317945
Episode:  726  S

Episode:  828  Score:  -132  Exploration factor:  0.01575835418494799
Episode:  829  Score:  -136  Exploration factor:  0.01567956241402325
Episode:  830  Score:  -141  Exploration factor:  0.015601164601953134
Episode:  831  Score:  -150  Exploration factor:  0.015523158778943369
Episode:  832  Score:  -150  Exploration factor:  0.015445542985048652
Episode:  833  Score:  -149  Exploration factor:  0.015368315270123408
Episode:  834  Score:  -136  Exploration factor:  0.01529147369377279
Episode:  835  Score:  -139  Exploration factor:  0.015215016325303928
Episode:  836  Score:  -139  Exploration factor:  0.015138941243677408
Episode:  837  Score:  -132  Exploration factor:  0.01506324653745902
Episode:  838  Score:  -153  Exploration factor:  0.014987930304771725
Episode:  839  Score:  -140  Exploration factor:  0.014912990653247866
Episode:  840  Score:  -143  Exploration factor:  0.014838425699981627
Episode:  841  Score:  -134  Exploration factor:  0.01476423357148172
Episode:  8

Episode:  945  Score:  -138  Exploration factor:  0.00998645168764533
Episode:  946  Score:  -139  Exploration factor:  0.00998645168764533
Episode:  947  Score:  -137  Exploration factor:  0.00998645168764533
Episode:  948  Score:  -141  Exploration factor:  0.00998645168764533
Episode:  949  Score:  -86  Exploration factor:  0.00998645168764533
Episode:  950  Score:  -87  Exploration factor:  0.00998645168764533
Episode:  951  Score:  -144  Exploration factor:  0.00998645168764533
Episode:  952  Score:  -137  Exploration factor:  0.00998645168764533
Episode:  953  Score:  -137  Exploration factor:  0.00998645168764533
Episode:  954  Score:  -139  Exploration factor:  0.00998645168764533
Episode:  955  Score:  -143  Exploration factor:  0.00998645168764533
Episode:  956  Score:  -146  Exploration factor:  0.00998645168764533
Episode:  957  Score:  -99  Exploration factor:  0.00998645168764533
Episode:  958  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  959  Score:  

Episode:  1062  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1063  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1064  Score:  -92  Exploration factor:  0.00998645168764533
Episode:  1065  Score:  -94  Exploration factor:  0.00998645168764533
Episode:  1066  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1067  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1068  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1069  Score:  -105  Exploration factor:  0.00998645168764533
Episode:  1070  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1071  Score:  -84  Exploration factor:  0.00998645168764533
Episode:  1072  Score:  -96  Exploration factor:  0.00998645168764533
Episode:  1073  Score:  -95  Exploration factor:  0.00998645168764533
Episode:  1074  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1075  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1

Episode:  1178  Score:  -93  Exploration factor:  0.00998645168764533
Episode:  1179  Score:  -97  Exploration factor:  0.00998645168764533
Episode:  1180  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1181  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1182  Score:  -96  Exploration factor:  0.00998645168764533
Episode:  1183  Score:  -164  Exploration factor:  0.00998645168764533
Episode:  1184  Score:  -166  Exploration factor:  0.00998645168764533
Episode:  1185  Score:  -87  Exploration factor:  0.00998645168764533
Episode:  1186  Score:  -115  Exploration factor:  0.00998645168764533
Episode:  1187  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1188  Score:  -87  Exploration factor:  0.00998645168764533
Episode:  1189  Score:  -82  Exploration factor:  0.00998645168764533
Episode:  1190  Score:  -89  Exploration factor:  0.00998645168764533
Episode:  1191  Score:  -125  Exploration factor:  0.00998645168764533
Episode:  119

Episode:  1294  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1295  Score:  -127  Exploration factor:  0.00998645168764533
Episode:  1296  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1297  Score:  -169  Exploration factor:  0.00998645168764533
Episode:  1298  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1299  Score:  -165  Exploration factor:  0.00998645168764533
Episode:  1300  Score:  -156  Exploration factor:  0.00998645168764533
Episode:  1301  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1302  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1303  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1304  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1305  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1306  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1307  Score:  -90  Exploration factor:  0.00998645168764533
Episode

Episode:  1410  Score:  -91  Exploration factor:  0.00998645168764533
Episode:  1411  Score:  -95  Exploration factor:  0.00998645168764533
Episode:  1412  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1413  Score:  -149  Exploration factor:  0.00998645168764533
Episode:  1414  Score:  -154  Exploration factor:  0.00998645168764533
Episode:  1415  Score:  -154  Exploration factor:  0.00998645168764533
Episode:  1416  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1417  Score:  -85  Exploration factor:  0.00998645168764533
Episode:  1418  Score:  -101  Exploration factor:  0.00998645168764533
Episode:  1419  Score:  -150  Exploration factor:  0.00998645168764533
Episode:  1420  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1421  Score:  -93  Exploration factor:  0.00998645168764533
Episode:  1422  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1423  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  

Episode:  1526  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1527  Score:  -158  Exploration factor:  0.00998645168764533
Episode:  1528  Score:  -85  Exploration factor:  0.00998645168764533
Episode:  1529  Score:  -170  Exploration factor:  0.00998645168764533
Episode:  1530  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1531  Score:  -93  Exploration factor:  0.00998645168764533
Episode:  1532  Score:  -91  Exploration factor:  0.00998645168764533
Episode:  1533  Score:  -160  Exploration factor:  0.00998645168764533
Episode:  1534  Score:  -87  Exploration factor:  0.00998645168764533
Episode:  1535  Score:  -89  Exploration factor:  0.00998645168764533
Episode:  1536  Score:  -160  Exploration factor:  0.00998645168764533
Episode:  1537  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1538  Score:  -86  Exploration factor:  0.00998645168764533
Episode:  1539  Score:  -163  Exploration factor:  0.00998645168764533
Episode:  15

Episode:  1642  Score:  -157  Exploration factor:  0.00998645168764533
Episode:  1643  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1644  Score:  -172  Exploration factor:  0.00998645168764533
Episode:  1645  Score:  -170  Exploration factor:  0.00998645168764533
Episode:  1646  Score:  -154  Exploration factor:  0.00998645168764533
Episode:  1647  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1648  Score:  -160  Exploration factor:  0.00998645168764533
Episode:  1649  Score:  -150  Exploration factor:  0.00998645168764533
Episode:  1650  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1651  Score:  -157  Exploration factor:  0.00998645168764533
Episode:  1652  Score:  -185  Exploration factor:  0.00998645168764533
Episode:  1653  Score:  -88  Exploration factor:  0.00998645168764533
Episode:  1654  Score:  -156  Exploration factor:  0.00998645168764533
Episode:  1655  Score:  -179  Exploration factor:  0.00998645168764533
Episode

Episode:  1758  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1759  Score:  -159  Exploration factor:  0.00998645168764533
Episode:  1760  Score:  -144  Exploration factor:  0.00998645168764533
Episode:  1761  Score:  -87  Exploration factor:  0.00998645168764533
Episode:  1762  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1763  Score:  -86  Exploration factor:  0.00998645168764533
Episode:  1764  Score:  -158  Exploration factor:  0.00998645168764533
Episode:  1765  Score:  -195  Exploration factor:  0.00998645168764533
Episode:  1766  Score:  -157  Exploration factor:  0.00998645168764533
Episode:  1767  Score:  -199  Exploration factor:  0.00998645168764533
Episode:  1768  Score:  -100  Exploration factor:  0.00998645168764533
Episode:  1769  Score:  -146  Exploration factor:  0.00998645168764533
Episode:  1770  Score:  -103  Exploration factor:  0.00998645168764533
Episode:  1771  Score:  -199  Exploration factor:  0.00998645168764533
Episode:

In [None]:
for episode in range(1):
    
    state=env.reset()                                 #intial state for each episode
    state=np.reshape(state,(1,observation_size))
    
    for t in range(5000):
        
        env.render()
        
        next_state,reward,done,other_info=env.step(np.argmax(agent.model.predict(state)[0]))
        next_state=np.reshape(next_state,(1,observation_size))
        
        state=next_state
        
        if done:
            print("Episode: ",episode," Score: ",t)
            break
            
env.close()