In [1]:
import numpy as np 
import gym,random,os
from tensorflow.keras.layers import Dense 
from tensorflow.keras.optimizers import Adam 
from tensorflow.keras.models import Sequential
from collections import deque

In [2]:
env = gym.make("CartPole-v1")

In [3]:
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

In [4]:
if not os.path.exists("Saved_Models"):
    os.mkdir("Saved_Models")

    
    
output_dir = os.path.join("Saved_Models")

In [5]:
batch_size=32

In [6]:
class DQNAgent:
    def __init__(self,state_size,action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.99 
        self.epsilon = 1.0
        self.epsilon_decay = 0.995 
        self.epsilon_min = 0.01
        
        self.lr = 0.001 
        self.model = self.build_model()
    
    def build_model(self):
        model = Sequential()
        model.add(Dense(24,input_dim=self.state_size,activation='relu'))
        model.add(Dense(24,activation='relu'))
        model.add(Dense(self.action_size,activation='linear'))
        
        model.compile(loss='mse',optimizer=Adam(learning_rate=self.lr))
        
        return model
    
    def remember(self,state,action,reward,next_state,done):
        self.memory.append((state,action,reward,next_state,done))
    
    def act(self,state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size) # Explore 
        else:
            act_values = self.model.predict(state)
            return np.argmax(act_values[0])   #Exploit 
    
    def replay(self,batch_size):
        minibatch = random.sample(self.memory,batch_size)
        
        for state,action,reward,next_state,done in minibatch:
            target = reward 
            
            if not done:
                target =  (reward + self.gamma * np.amax(next_state[0]))
            
            target_f = self.model.predict(state)
            target_f[0][action] = target 
            
            self.model.fit(state,target_f,epochs=1,verbose=0)
        
        
        if self.epsilon >  self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def load(self,name):
        self.model.load_weights(name)
    
    def save(self,name):
        self.model.save_weights_we(name)
        
    
    
    


In [7]:
dqn = DQNAgent(state_size,action_size)

In [None]:
done = False 

for e in range(1001):
    state = env.reset()
    
    state = np.reshape(state,[1,state_size])
    
    for t in range(5000):
        
        
        
        action = dqn.act(state)
        
        
        next_state,reward,done,_ = env.step(action)
        
        
        reward = reward if not done else -10 
        
        next_state = np.reshape(next_state,[1,state_size])
        
        
        dqn.remember(state,action,reward,next_state,done)
        
        
        state = next_state
        
        if done:
            print(f"Episodes : {e}/1000 Score : {t} Epsilon: {dqn.epsilon}")
            break 
    
    
    if len(dqn.memory) > batch_size:
        dqn.replay(batch_size)
        
        

Episodes : 0/1000 Score : 21 Epsilon: 1.0
Episodes : 1/1000 Score : 47 Epsilon: 1.0
Episodes : 2/1000 Score : 30 Epsilon: 0.995
Episodes : 3/1000 Score : 16 Epsilon: 0.990025
Episodes : 4/1000 Score : 8 Epsilon: 0.985074875
Episodes : 5/1000 Score : 32 Epsilon: 0.9801495006250001
Episodes : 6/1000 Score : 26 Epsilon: 0.9752487531218751
Episodes : 7/1000 Score : 48 Epsilon: 0.9703725093562657
Episodes : 8/1000 Score : 31 Epsilon: 0.9655206468094844
Episodes : 9/1000 Score : 38 Epsilon: 0.960693043575437
Episodes : 10/1000 Score : 22 Epsilon: 0.9558895783575597
Episodes : 11/1000 Score : 11 Epsilon: 0.9511101304657719
Episodes : 12/1000 Score : 27 Epsilon: 0.946354579813443
Episodes : 13/1000 Score : 15 Epsilon: 0.9416228069143757
Episodes : 14/1000 Score : 13 Epsilon: 0.9369146928798039
Episodes : 15/1000 Score : 10 Epsilon: 0.9322301194154049
Episodes : 16/1000 Score : 14 Epsilon: 0.9275689688183278
Episodes : 17/1000 Score : 34 Epsilon: 0.9229311239742362
Episodes : 18/1000 Score : 15

Episodes : 143/1000 Score : 11 Epsilon: 0.4907693883854626
Episodes : 144/1000 Score : 12 Epsilon: 0.4883155414435353
Episodes : 145/1000 Score : 9 Epsilon: 0.4858739637363176
Episodes : 146/1000 Score : 17 Epsilon: 0.483444593917636
Episodes : 147/1000 Score : 19 Epsilon: 0.4810273709480478
Episodes : 148/1000 Score : 15 Epsilon: 0.47862223409330756
Episodes : 149/1000 Score : 7 Epsilon: 0.47622912292284103
Episodes : 150/1000 Score : 12 Epsilon: 0.4738479773082268
Episodes : 151/1000 Score : 14 Epsilon: 0.47147873742168567
Episodes : 152/1000 Score : 10 Epsilon: 0.46912134373457726
Episodes : 153/1000 Score : 23 Epsilon: 0.46677573701590436
Episodes : 154/1000 Score : 9 Epsilon: 0.46444185833082485
Episodes : 155/1000 Score : 10 Epsilon: 0.46211964903917074
Episodes : 156/1000 Score : 12 Epsilon: 0.4598090507939749
Episodes : 157/1000 Score : 9 Epsilon: 0.457510005540005
Episodes : 158/1000 Score : 17 Epsilon: 0.45522245551230495
Episodes : 159/1000 Score : 10 Epsilon: 0.452946343234

Episodes : 282/1000 Score : 11 Epsilon: 0.24450384299593592
Episodes : 283/1000 Score : 9 Epsilon: 0.24328132378095624
Episodes : 284/1000 Score : 9 Epsilon: 0.24206491716205145
Episodes : 285/1000 Score : 9 Epsilon: 0.2408545925762412
Episodes : 286/1000 Score : 8 Epsilon: 0.23965031961336
Episodes : 287/1000 Score : 9 Epsilon: 0.2384520680152932
Episodes : 288/1000 Score : 8 Epsilon: 0.23725980767521673
Episodes : 289/1000 Score : 14 Epsilon: 0.23607350863684065
Episodes : 290/1000 Score : 10 Epsilon: 0.23489314109365644
Episodes : 291/1000 Score : 9 Epsilon: 0.23371867538818816
Episodes : 292/1000 Score : 12 Epsilon: 0.23255008201124722
Episodes : 293/1000 Score : 12 Epsilon: 0.231387331601191
Episodes : 294/1000 Score : 10 Epsilon: 0.23023039494318503
Episodes : 295/1000 Score : 9 Epsilon: 0.2290792429684691
Episodes : 296/1000 Score : 10 Epsilon: 0.22793384675362674
Episodes : 297/1000 Score : 8 Epsilon: 0.22679417751985861
Episodes : 298/1000 Score : 11 Epsilon: 0.225660206632259

Episodes : 421/1000 Score : 8 Epsilon: 0.12181307688414106
Episodes : 422/1000 Score : 10 Epsilon: 0.12120401149972035
Episodes : 423/1000 Score : 7 Epsilon: 0.12059799144222175
Episodes : 424/1000 Score : 8 Epsilon: 0.11999500148501063
