In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
!pip install kaggle_environments

Collecting kaggle_environments
[?25l  Downloading https://files.pythonhosted.org/packages/72/c5/aca51b5dc0b1d1c601f0971333aee2946aa6eaa108c869671a9fe5583e24/kaggle_environments-1.2.2-py2.py3-none-any.whl (92kB)
[K     |████████████████████████████████| 92kB 4.1MB/s 
[?25hCollecting jsonschema>=3.0.1
[?25l  Downloading https://files.pythonhosted.org/packages/c5/8f/51e89ce52a085483359217bc72cdbf6e75ee595d5b1d4b5ade40c7e018b8/jsonschema-3.2.0-py2.py3-none-any.whl (56kB)
[K     |████████████████████████████████| 61kB 4.3MB/s 
Installing collected packages: jsonschema, kaggle-environments
  Found existing installation: jsonschema 2.6.0
    Uninstalling jsonschema-2.6.0:
      Successfully uninstalled jsonschema-2.6.0
Successfully installed jsonschema-3.2.0 kaggle-environments-1.2.2


In [21]:
from kaggle_environments import evaluate, make, utils
import numpy as np
from collections import deque
from tensorflow import keras
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, load_model
import random
from IPython.display import clear_output

In [5]:
class DQNAgent:
    
    def __init__(self, state_size, action_size):
        self.state_size = state_size             # Input size from emulater
        self.action_size = action_size           # Number of actions available
        self.memory = deque(maxlen=2000)         # Max. size of our memory. Older observations are overwritten once memory if full
        self.gamma = 0.95                        # discount rate
        self.epsilon = 1.0                       # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_max = 1.0
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001               # Learning rate for our model
        self.model = self._build_model()

    # This the policy that our agent will use to take actions
    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        ip = keras.Input(shape=(self.state_size))
        x = Dense(64, activation='relu')(ip)
        x = Dense(32, activation='relu')(x)
        output = Dense(self.action_size, activation='linear')(x)

        model = Model(ip, output)
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model
    
    # Saving our data into a replay memory
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    # Given a state, this function returns the action with maximum q-value
    def get_action(self, state, config):
        
        if np.random.rand() <= self.epsilon:
            return random.choice([col for col in range(config.columns) if state.board[int(col)] == 0])
        act_values = self.model.predict(np.array(state['board']).reshape([1, state_size]))
        action = np.argmax(act_values[0])
        is_valid = (state['board'][int(action)] == 0)
        if is_valid: # Play the move
            return int(action)  # returns action
        else: 
            return random.choice([col for col in range(config.columns) if state.board[int(col)] == 0])

    # Training our model with experience replay
    def replay(self, batch_size, episode):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            #print(state, reward, next_state)
            state = np.reshape(state, [1, self.state_size])
            next_state = np.reshape(next_state, [1, self.state_size])
            #print(state.shape, reward, next_state.shape)
            target = reward
            if not done:
                target = reward + self.gamma * np.max(self.model.predict(next_state))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            #print(target_f)
            self.model.fit(state, target_f, epochs=1, verbose=0)
        
        # Adjusting the exploration rate with experince
        if self.epsilon > self.epsilon_min:
            #self.epsilon = self.epsilon_min + (self.epsilon_max - self.epsilon_min)*np.exp(-self.epsilon_decay*episode)
            self.epsilon *= self.epsilon_decay

    # To load the saved model weights
    def load(self, name):
        self.model.load_weights(name)

    # To save the trained agent so that we can play with him later
    def save(self, name):
        self.model.save_weights(name)

In [6]:
env = make("connectx", debug=True)
env.render(mode='ipython')

In [7]:
# Get state size
state_size = env.configuration['columns']*env.configuration['rows']  
print('state_size:', state_size)

# Get number of available Actions
action_size = env.configuration['columns']
print('action_size:', action_size)

state_size: 42
action_size: 7


In [None]:
agent = DQNAgent(state_size, action_size)
done = False
batch_size = 128
EPISODES = 1000
render = False

# Debug/Train
# Play as first position against random agent.
config = env.configuration

agent0 = load_model('drive/My Drive/ConnectX/connectx_2000.h5')

opponent = ['random', agent0]

trainer = env.train([None, "random"])

for e in range(EPISODES+1):

    episode_reward = 0
    state = trainer.reset()

    while not env.done:
        #observation = np.array(state['board']).reshape([1, state_size])
        if render:
            #clear_output()
            env.render(mode="ipython", width=100, height=90, header=False, controls=False)
            
        action = agent.get_action(state, config)
        #print("My Action", my_action)
        
        next_state, reward, done, info = trainer.step(action)
        #print(action, next_state, reward, done, info)
        
        if done:
            if reward == 1:   # agent wins the agme
                reward = 20
            elif reward == 0: # The opponent won the game
                reward = -20
            else: # tie
                reward = 10
        else: 
            reward = -1

        # Save the current states into our memory
        agent.remember(np.array(state['board']), action, reward, np.array(next_state['board']), done)
        state = next_state
        
        episode_reward += reward
        
        # At end of episode show stats
        if done or env.done:
            print("episode: {}/{}, score: {}, e: {:.2}"
                    .format(e, EPISODES, episode_reward, agent.epsilon))
            
            # Update our model by sampling the states from our memory
            if len(agent.memory) > batch_size:
                agent.replay(batch_size, e)

            break
            
    # Saving model after every 100 episodes of training   
    if e % 100 == 0:
        agent.save('drive/My Drive/ConnectX/connectx_{}.h5'.format(e))


episode: 0/1000, score: 15, e: 1.0
episode: 1/1000, score: 8, e: 1.0
episode: 2/1000, score: 9, e: 1.0
episode: 3/1000, score: 16, e: 1.0
episode: 4/1000, score: 12, e: 1.0
episode: 5/1000, score: 5, e: 1.0
episode: 6/1000, score: 2, e: 1.0
episode: 7/1000, score: 2, e: 1.0
episode: 8/1000, score: 12, e: 1.0
episode: 9/1000, score: -1, e: 1.0
episode: 10/1000, score: 7, e: 1.0
episode: 11/1000, score: 14, e: 1.0
episode: 12/1000, score: 1, e: 1.0
episode: 13/1000, score: 7, e: 0.99
episode: 14/1000, score: 6, e: 0.99
episode: 15/1000, score: 6, e: 0.99
episode: 16/1000, score: -5, e: 0.98
episode: 17/1000, score: 11, e: 0.98
episode: 18/1000, score: 14, e: 0.97
episode: 19/1000, score: 12, e: 0.97
episode: 20/1000, score: 4, e: 0.96
episode: 21/1000, score: 14, e: 0.96
episode: 22/1000, score: 6, e: 0.95
episode: 23/1000, score: 16, e: 0.95
episode: 24/1000, score: 17, e: 0.94
episode: 25/1000, score: 13, e: 0.94
episode: 26/1000, score: -2, e: 0.93
episode: 27/1000, score: -5, e: 0.93

In [10]:
# This agent random chooses a non-empty column.
def my_agent(observation, configuration):
    #from random import choice
    action = agent.get_action(observation, config)
    #return choice([c for c in range(configuration.columns) if observation.board[c] == 0])
    return action

In [36]:
# Testing Agent
# Play as the first agent against default "random" agent.
env.run([my_agent, my_agent])
env.render(mode="ipython", width=500, height=450)

In [29]:
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / float(len(rewards))

# Run multiple episodes to estimate its performance.
print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [my_agent, "random"], num_episodes=10)))
print("Random Agent vs My Agent:", mean_reward(evaluate("connectx", ["random", my_agent], num_episodes=10)))
print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=10)))
print("Negamax vs My Agent:", mean_reward(evaluate("connectx", ["negamax", my_agent], num_episodes=10)))

My Agent vs Random Agent: 0.4
Random Agent vs My Agent: -0.2
My Agent vs Negamax Agent: -1.0
Negamax vs My Agent: 1.0


In [12]:
# Submission File
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)

write_agent_to_file(my_agent, "drive/My Drive/ConnectX/submission.py")

<function my_agent at 0x7f9386977d90> written to drive/My Drive/ConnectX/submission.py


In [22]:
# Note: Stdout replacement is a temporary workaround.
import sys
out = sys.stdout
submission = utils.read_file("drive/My Drive/ConnectX/submission.py")
agent = utils.get_last_callable(submission)
sys.stdout = out

env = make("connectx", debug=True)
env.run([agent, agent])
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")

AttributeError: ignored