In [1]:
import gym
import numpy as np
from collections import deque

In [2]:
env = gym.make('CartPole-v0')

batch_size = 32
total_episodes = 1000

state_size = env.observation_space.shape[0]
action_size = env.action_space.n

In [3]:
import random

In [4]:
from keras.models import Sequential 
from keras.layers import Dense
from keras.optimizers import Adam 

Using TensorFlow backend.


In [5]:
class Pole:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen = 2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learnig_rate = 0.001
        self.model = self.make_model()
        
    def make_model(self):
        model = Sequential()
        model.add(Dense(64, input_dim = state_size, activation = 'relu'))
        model.add(Dense(32, activation = 'relu'))
        model.add(Dense(action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learnig_rate))
        
        return model
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        
    def train(self, batch_size):
        mini_batch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in mini_batch:
            if done:
                target = reward
            else:
                target = reward + np.max(self.gamma*self.model.predict(next_state))
            
            y = self.model.predict(state)
            y[0][action] = target
            self.model.fit(state, y, epochs=1, verbose=0)
            
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
                
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_val = self.model.predict(state)
        return np.argmax(act_val[0])

In [6]:
agent = Pole(state_size, action_size)

for epi in range(total_episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    
    done = False
    active_time= 0
    
    while not done:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        reward = reward if not done else -10
        next_state = np.reshape(next_state, [1, state_size])
        
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done: 
            print(f"episode: {epi+1}, score: {active_time}, e: {agent.epsilon}")
        active_time+=1
            
    if len(agent.memory) > batch_size:
        agent.train(batch_size)
        print('-'*20 +f'trained{len(agent.memory)}'+ '-'*20)

episode: 1, score: 7, e: 1.0
episode: 2, score: 64, e: 1.0
--------------------trained73--------------------
episode: 3, score: 9, e: 0.995
--------------------trained83--------------------
episode: 4, score: 15, e: 0.990025
--------------------trained99--------------------
episode: 5, score: 14, e: 0.985074875
--------------------trained114--------------------
episode: 6, score: 15, e: 0.9801495006250001
--------------------trained130--------------------
episode: 7, score: 15, e: 0.9752487531218751
--------------------trained146--------------------
episode: 8, score: 18, e: 0.9703725093562657
--------------------trained165--------------------
episode: 9, score: 9, e: 0.9655206468094844
--------------------trained175--------------------
episode: 10, score: 17, e: 0.960693043575437
--------------------trained193--------------------
episode: 11, score: 22, e: 0.9558895783575597
--------------------trained216--------------------
episode: 12, score: 81, e: 0.9511101304657719
--------------

--------------------trained1767--------------------
episode: 89, score: 10, e: 0.6465587967553006
--------------------trained1778--------------------
episode: 90, score: 12, e: 0.6433260027715241
--------------------trained1791--------------------
episode: 91, score: 8, e: 0.6401093727576664
--------------------trained1800--------------------
episode: 92, score: 11, e: 0.6369088258938781
--------------------trained1812--------------------
episode: 93, score: 9, e: 0.6337242817644086
--------------------trained1822--------------------
episode: 94, score: 11, e: 0.6305556603555866
--------------------trained1834--------------------
episode: 95, score: 15, e: 0.6274028820538087
--------------------trained1850--------------------
episode: 96, score: 9, e: 0.6242658676435396
--------------------trained1860--------------------
episode: 97, score: 14, e: 0.6211445383053219
--------------------trained1875--------------------
episode: 98, score: 8, e: 0.6180388156137953
--------------------trai

--------------------trained2000--------------------
episode: 172, score: 23, e: 0.42650460709830135
--------------------trained2000--------------------
episode: 173, score: 26, e: 0.42437208406280985
--------------------trained2000--------------------
episode: 174, score: 34, e: 0.4222502236424958
--------------------trained2000--------------------
episode: 175, score: 23, e: 0.42013897252428334
--------------------trained2000--------------------
episode: 176, score: 27, e: 0.4180382776616619
--------------------trained2000--------------------
episode: 177, score: 23, e: 0.4159480862733536
--------------------trained2000--------------------
episode: 178, score: 54, e: 0.41386834584198684
--------------------trained2000--------------------
episode: 179, score: 35, e: 0.4117990041127769
--------------------trained2000--------------------
episode: 180, score: 74, e: 0.40974000909221303
--------------------trained2000--------------------
episode: 181, score: 27, e: 0.40769130904675194
----

--------------------trained2000--------------------
episode: 255, score: 153, e: 0.28134514724562876
--------------------trained2000--------------------
episode: 256, score: 199, e: 0.2799384215094006
--------------------trained2000--------------------
episode: 257, score: 122, e: 0.27853872940185365
--------------------trained2000--------------------
episode: 258, score: 163, e: 0.27714603575484437
--------------------trained2000--------------------
episode: 259, score: 121, e: 0.2757603055760701
--------------------trained2000--------------------
episode: 260, score: 199, e: 0.2743815040481898
--------------------trained2000--------------------
episode: 261, score: 199, e: 0.2730095965279488
--------------------trained2000--------------------
episode: 262, score: 63, e: 0.27164454854530906
--------------------trained2000--------------------
episode: 263, score: 125, e: 0.2702863258025825
--------------------trained2000--------------------
episode: 264, score: 151, e: 0.26893489417356

episode: 337, score: 199, e: 0.1865228530605915
--------------------trained2000--------------------
episode: 338, score: 115, e: 0.18559023879528855
--------------------trained2000--------------------
episode: 339, score: 77, e: 0.1846622876013121
--------------------trained2000--------------------
episode: 340, score: 142, e: 0.18373897616330553
--------------------trained2000--------------------
episode: 341, score: 148, e: 0.182820281282489
--------------------trained2000--------------------
episode: 342, score: 132, e: 0.18190617987607657
--------------------trained2000--------------------
episode: 343, score: 66, e: 0.18099664897669618
--------------------trained2000--------------------
episode: 344, score: 63, e: 0.1800916657318127
--------------------trained2000--------------------
episode: 345, score: 170, e: 0.17919120740315364
--------------------trained2000--------------------
episode: 346, score: 44, e: 0.17829525136613786
--------------------trained2000--------------------

episode: 420, score: 149, e: 0.12304040492325048
--------------------trained2000--------------------
episode: 421, score: 105, e: 0.12242520289863423
--------------------trained2000--------------------
episode: 422, score: 57, e: 0.12181307688414106
--------------------trained2000--------------------
episode: 423, score: 177, e: 0.12120401149972035
--------------------trained2000--------------------
episode: 424, score: 58, e: 0.12059799144222175
--------------------trained2000--------------------
episode: 425, score: 94, e: 0.11999500148501063
--------------------trained2000--------------------
episode: 426, score: 96, e: 0.11939502647758558
--------------------trained2000--------------------
episode: 427, score: 139, e: 0.11879805134519765
--------------------trained2000--------------------
episode: 428, score: 133, e: 0.11820406108847166
--------------------trained2000--------------------
episode: 429, score: 70, e: 0.1176130407830293
--------------------trained2000-----------------

episode: 502, score: 199, e: 0.08157186144027828
--------------------trained2000--------------------
episode: 503, score: 199, e: 0.0811640021330769
--------------------trained2000--------------------
episode: 504, score: 199, e: 0.08075818212241151
--------------------trained2000--------------------
episode: 505, score: 199, e: 0.08035439121179945
--------------------trained2000--------------------
episode: 506, score: 194, e: 0.07995261925574046
--------------------trained2000--------------------
episode: 507, score: 199, e: 0.07955285615946175
--------------------trained2000--------------------
episode: 508, score: 199, e: 0.07915509187866444
--------------------trained2000--------------------
episode: 509, score: 126, e: 0.07875931641927113
--------------------trained2000--------------------
episode: 510, score: 170, e: 0.07836551983717477
--------------------trained2000--------------------
episode: 511, score: 147, e: 0.07797369223798889
--------------------trained2000------------

episode: 584, score: 82, e: 0.05407954064343768
--------------------trained2000--------------------
episode: 585, score: 70, e: 0.05380914294022049
--------------------trained2000--------------------
episode: 586, score: 88, e: 0.05354009722551939
--------------------trained2000--------------------
episode: 587, score: 107, e: 0.05327239673939179
--------------------trained2000--------------------
episode: 588, score: 59, e: 0.053006034755694834
--------------------trained2000--------------------
episode: 589, score: 60, e: 0.052741004581916356
--------------------trained2000--------------------
episode: 590, score: 151, e: 0.052477299559006776
--------------------trained2000--------------------
episode: 591, score: 50, e: 0.052214913061211746
--------------------trained2000--------------------
episode: 592, score: 71, e: 0.05195383849590569
--------------------trained2000--------------------
episode: 593, score: 96, e: 0.05169406930342616
--------------------trained2000---------------

--------------------trained2000--------------------
episode: 666, score: 67, e: 0.03585300941485119
--------------------trained2000--------------------
episode: 667, score: 126, e: 0.035673744367776934
--------------------trained2000--------------------
episode: 668, score: 153, e: 0.03549537564593805
--------------------trained2000--------------------
episode: 669, score: 199, e: 0.035317898767708356
--------------------trained2000--------------------
episode: 670, score: 155, e: 0.03514130927386981
--------------------trained2000--------------------
episode: 671, score: 33, e: 0.03496560272750046
--------------------trained2000--------------------
episode: 672, score: 99, e: 0.03479077471386296
--------------------trained2000--------------------
episode: 673, score: 80, e: 0.03461682084029365
--------------------trained2000--------------------
episode: 674, score: 132, e: 0.034443736736092176
--------------------trained2000--------------------
episode: 675, score: 66, e: 0.0342715180

--------------------trained2000--------------------
episode: 747, score: 104, e: 0.023888845163905856
--------------------trained2000--------------------
episode: 748, score: 148, e: 0.023769400938086327
--------------------trained2000--------------------
episode: 749, score: 199, e: 0.023650553933395897
--------------------trained2000--------------------
episode: 750, score: 99, e: 0.023532301163728918
--------------------trained2000--------------------
episode: 751, score: 110, e: 0.023414639657910272
--------------------trained2000--------------------
episode: 752, score: 123, e: 0.023297566459620722
--------------------trained2000--------------------
episode: 753, score: 126, e: 0.023181078627322618
--------------------trained2000--------------------
episode: 754, score: 122, e: 0.023065173234186005
--------------------trained2000--------------------
episode: 755, score: 120, e: 0.022949847368015076
--------------------trained2000--------------------
episode: 756, score: 130, e: 0.

--------------------trained2000--------------------
episode: 829, score: 199, e: 0.01583754189442009
--------------------trained2000--------------------
episode: 830, score: 16, e: 0.01575835418494799
--------------------trained2000--------------------
episode: 831, score: 194, e: 0.01567956241402325
--------------------trained2000--------------------
episode: 832, score: 199, e: 0.015601164601953134
--------------------trained2000--------------------
episode: 833, score: 199, e: 0.015523158778943369
--------------------trained2000--------------------
episode: 834, score: 199, e: 0.015445542985048652
--------------------trained2000--------------------
episode: 835, score: 199, e: 0.015368315270123408
--------------------trained2000--------------------
episode: 836, score: 153, e: 0.01529147369377279
--------------------trained2000--------------------
episode: 837, score: 199, e: 0.015215016325303928
--------------------trained2000--------------------
episode: 838, score: 172, e: 0.0151

episode: 910, score: 199, e: 0.010552547534153616
--------------------trained2000--------------------
episode: 911, score: 199, e: 0.010499784796482848
--------------------trained2000--------------------
episode: 912, score: 199, e: 0.010447285872500434
--------------------trained2000--------------------
episode: 913, score: 199, e: 0.01039504944313793
--------------------trained2000--------------------
episode: 914, score: 199, e: 0.010343074195922241
--------------------trained2000--------------------
episode: 915, score: 16, e: 0.01029135882494263
--------------------trained2000--------------------
episode: 916, score: 199, e: 0.010239902030817916
--------------------trained2000--------------------
episode: 917, score: 199, e: 0.010188702520663827
--------------------trained2000--------------------
episode: 918, score: 199, e: 0.010137759008060509
--------------------trained2000--------------------
episode: 919, score: 199, e: 0.010087070213020206
--------------------trained2000----

--------------------trained2000--------------------
episode: 993, score: 14, e: 0.00998645168764533
--------------------trained2000--------------------
episode: 994, score: 15, e: 0.00998645168764533
--------------------trained2000--------------------
episode: 995, score: 13, e: 0.00998645168764533
--------------------trained2000--------------------
episode: 996, score: 10, e: 0.00998645168764533
--------------------trained2000--------------------
episode: 997, score: 13, e: 0.00998645168764533
--------------------trained2000--------------------
episode: 998, score: 11, e: 0.00998645168764533
--------------------trained2000--------------------
episode: 999, score: 12, e: 0.00998645168764533
--------------------trained2000--------------------
episode: 1000, score: 15, e: 0.00998645168764533
--------------------trained2000--------------------
