#2048 Deep Q-Learning implementation

This Google Colab Jupyter Notebook consists of the game source code and fully implemented deep q-learning neural network designed to win it

### Imports

In [None]:
import tensorflow as tf
import numpy as np

### Initial game conditions

In [19]:
game_board = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
score = 0

### Test gameplay

In [20]:
import logic_2048
import time

print("Initial board: " + str(game_board))

game_matrix = logic_2048.place_new(game_board)

for each in range(10):
    print("Seeded board: " + str(game_matrix))

    if each % 2:
        game_matrix, success, top_value, total_score = logic_2048.transform_matrix(
            game_matrix, "a", score
        )
    else:
        game_matrix, success, top_value, total_score = logic_2048.transform_matrix(
            game_matrix, "s", score
        )
    score = total_score

    if logic_2048.win_check(game_matrix):
        # If client wins, construct result json with status code 1 and send via websocket
        print("CONGRATS! YOU WON!")
        print(str(game_matrix) + "\nScore: " + str(score))

    elif not success:
        # If client loses, construct result json with status code 255 and send via websocket
        print("Game over")
        print(str(game_matrix) + "\nScore: " + str(score))
    else:
        print(str(game_matrix) + "\nScore: " + str(score))
        
    time.sleep(2)

Initial board: [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
Seeded board: [[0, 0, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]
[[0, 0, 0, 0], [0, 0, 0, 2], [0, 0, 0, 0], [0, 2, 0, 0]]
Score: 0
Seeded board: [[0, 0, 0, 0], [0, 0, 0, 2], [0, 0, 0, 0], [0, 2, 0, 0]]
[[0, 0, 0, 0], [2, 0, 0, 0], [0, 0, 2, 0], [2, 0, 0, 0]]
Score: 0
Seeded board: [[0, 0, 0, 0], [2, 0, 0, 0], [0, 0, 2, 0], [2, 0, 0, 0]]
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 2], [4, 0, 2, 0]]
Score: 4
Seeded board: [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 2], [4, 0, 2, 0]]
[[0, 0, 0, 0], [0, 0, 0, 0], [2, 0, 0, 2], [4, 2, 0, 0]]
Score: 4
Seeded board: [[0, 0, 0, 0], [0, 0, 0, 0], [2, 0, 0, 2], [4, 2, 0, 0]]
[[0, 0, 0, 2], [0, 0, 0, 0], [2, 0, 0, 0], [4, 2, 0, 2]]
Score: 4
Seeded board: [[0, 0, 0, 2], [0, 0, 0, 0], [2, 0, 0, 0], [4, 2, 0, 2]]
[[2, 0, 0, 0], [0, 0, 0, 0], [2, 0, 2, 0], [4, 4, 0, 0]]
Score: 8
Seeded board: [[2, 0, 0, 0], [0, 0, 0, 0], [2, 0, 2, 0], [4, 4, 0, 0]]
[[0, 0, 0, 0], [0, 0, 0, 0], [4, 0,

### ChatGPT Version
v
Imports

In [None]:
import tensorflow

### Define Q-Neural Network

In [None]:
class DQN:
    def __init__(self, input_size, output_size, learning_rate=0.001):
        self.input_size = input_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.inputs = tf.placeholder(tf.float32, shape=[None, input_size])
        self.targets = tf.placeholder(tf.float32, shape=[None, output_size])

        hidden_layer = tf.layers.dense(self.inputs, 128, activation=tf.nn.relu)
        self.predictions = tf.layers.dense(hidden_layer, output_size)

        self.loss = tf.reduce_mean(tf.square(self.targets - self.predictions))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss)

### Implement the Deep Q-Learning Algorithm

In [None]:
class DQNAgent:
    def __init__(self, input_size, output_size, learning_rate=0.001, discount_factor=0.99, exploration_rate=1.0, exploration_decay_rate=0.9995, memory_size=50000, batch_size=32):
        self.input_size = input_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay_rate = exploration_decay_rate
        self.memory_size = memory_size
        self.batch_size = batch_size
        self.memory = []
        self.step = 0
        self.model = DQN(input_size, output_size, learning_rate)

    def get_action(self, state):
        if np.random.rand() <= self.exploration_rate:
            return np.random.choice(range(self.output_size))
        else:
            Q_values = self.model.predictions.eval(feed_dict={self.model.inputs: [state]})[0]
            return np.argmax(Q_values)

    def add_to_memory(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        if len(self.memory) > self.memory_size:
            self.memory.pop(0)

    def update_exploration_rate(self):
        self.exploration_rate *= self.exploration_decay_rate

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        states = np.array([item[0] for item in batch])
        actions = np.array([item[1] for item in batch])
        rewards = np.array([item[2] for item in batch])
        next_states = np.array([item[3] for item in batch])
        dones = np.array([item[4] for item in batch])
        next_Q_values = self.model.predictions.eval(feed_dict={self.model.inputs: next_states})
        targets = rewards + self.discount_factor * np.max(next_Q_values, axis=1) * (1 - dones)
        target_Q_values = self.model.predictions.eval(feed_dict={self.model.inputs: states})
        target_Q_values[np.arange(len(actions)), actions] = targets
        self.model.optimizer.run(feed_dict={self.model.inputs: states, self.model.targets: target_Q_values})

        self.step += 1
        if self.step % 100 == 0:
            self.update_exploration_rate()

### Train the neural network