In [41]:
from twentyFourtyEightNonVisual import GamePlayer
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras

In [42]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, BatchNormalization
import keras.backend as K

In [6]:
def custom_reward(prev_state, current_state):
    # Calculate the difference in scores between previous and current states
    score_diff = current_state.sum() - prev_state.sum()

    # Encourage positive score changes
    if score_diff > 0:
        score_reward = score_diff
    else:
        score_reward = 0  # No reward for score reduction

    # Encourage merging tiles
    merge_reward = calculate_merge_reward(prev_state, current_state)

    # Encourage keeping empty cells
    empty_cells_reward = calculate_empty_cells_reward(prev_state, current_state)

    # Encourage reaching larger tiles
    max_tile = current_state.max()
    if max_tile >= 256:
        max_tile_reward = max_tile  # Reward for reaching or exceeding 256
    else:
        max_tile_reward = 0

    # Encourage reaching the maximum tile (2048)
    if max_tile >= 2048:
        max_score_reward = 2048  # Reward for reaching or exceeding 2048
    else:
        max_score_reward = 0

    # Combine the rewards with different weights
    total_reward = (
        10 * score_reward
        + 1* merge_reward
        + 1* empty_cells_reward
        + 5 * max_tile_reward
        + 2 * max_score_reward
    )

    return total_reward

def calculate_merge_reward(prev_state, current_state):
    merge_reward = 0
    for row in range(4):
        for col in range(4):
            prev_tile = prev_state[row, col]
            current_tile = current_state[row, col]
            if current_tile > prev_tile:
                merge_reward += current_tile  # Reward for tile merge
    return merge_reward

def calculate_empty_cells_reward(prev_state, current_state):
    empty_cells_reward = 0
    empty_cells_prev = np.count_nonzero(prev_state == 0)
    empty_cells_current = np.count_nonzero(current_state == 0)
    if empty_cells_current > empty_cells_prev:
        empty_cells_reward = 1  # Reward for preserving or creating empty cells
    return empty_cells_reward

In [66]:
import multiprocessing

def train_model(gamer_model, action_space, custom_reward, max_iterations = 500):
    all_states = []
    all_actions = []
    all_rewards = []
    game_player = GamePlayer()
    k = 0
    while not game_player.is_game_over() and k < max_iterations:
        state = np.array(game_player.get_board())  # Adjust state shape for Keras model
        modeled_state = state.reshape(1, 4, 4, 1)  # Reshape to match the expected input shape
        
        q_values = gamer_model.predict(np.array([state]), verbose=False)
        
        # Choose the action with the highest Q-value (argmax)
        action = np.argmax(q_values)
        chosen_action = action_space[action]
        print(chosen_action)
        all_actions.append(chosen_action)
        prev_state = state
        state = game_player.move(chosen_action)
        reward = custom_reward(prev_state,state, state.max())
        all_rewards.append(reward)
        all_states.append(prev_state)
        k += 1
    
    # Convert lists to NumPy arrays
    all_states = np.array(all_states)
    all_actions = np.array(all_actions)
    all_rewards = np.array(all_rewards)
    gamer_model.fit(all_states, all_actions, sample_weight = all_rewards, batch_size = 1, epochs = 10, verbose = 0)
    return all_states[-1].sum()





loss_fn = keras.losses.SparseCategoricalCrossentropy()
gamer_model = Sequential()
# Layer 1: Convolutional layer with 32 filters, a kernel size of (2, 2), and 'valid' padding
gamer_model.add(Conv2D(4, kernel_size=(2, 2), padding='valid', activation='relu', input_shape=(4, 4, 1)))

# Layer 2: Convolutional layer with 64 filters, a kernel size of (2, 2), and 'valid' padding
gamer_model.add(Conv2D(16, kernel_size=(2, 2), padding='valid', activation='relu'))

# Layer 3: Convolutional layer with 128 filters, a kernel size of (2, 2), and 'valid' padding
gamer_model.add(Conv2D(4, kernel_size=(2, 2), padding='valid', activation='relu'))
# Layer 4: Flatten the output from the convolutional layers
gamer_model.add(Flatten())
gamer_model.add(Dense(len(action_space), activation='softmax'))  # Linear activation for Q-values
gamer_model.compile(optimizer='adam', loss=loss_fn)  # Use 'mse' for Q-learning

gamer_model.summary()

In [67]:
def new_custom_reward(prev_state, current_state, max_tile):
    # Calculate the reward based on the difference in the maximum tile and the number of empty cells
    empty_cells = sum(1 for row in current_state for cell in row if cell == 0)
    reward_empty_cells = empty_cells * 0.1  # Reward empty cells
    
    # Check if a merge occurred and reward it
    reward_merges = 0
    for i in range(4):
        for j in range(4):
            if prev_state[i][j] == current_state[i][j]:
                continue
            if current_state[i][j] > prev_state[i][j]:
                reward_merges += current_state[i][j] - prev_state[i][j]
    
    # Reward for achieving the maximum tile (winning the game)
    reward_max_tile = 1000 if max_tile >= 2048 else 0
    
    # Total reward
    total_reward = reward_empty_cells + reward_merges + reward_max_tile
    
    return total_reward

In [68]:

# Define the model
model = Sequential()

# Convolutional layers
model.add(Conv2D(32, (2, 2), padding='same', activation='relu', input_shape=(4, 4, 1)))
model.add(Conv2D(64, (2, 2), padding='same', activation='relu'))
model.add(Conv2D(128, (2, 2), padding='same', activation='relu'))

# Flatten the output from the convolutional layers
model.add(Flatten())

# Dense layers for action selection
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(4, activation='linear'))  # Use 'linear' activation for Q-values

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()


Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_48 (Conv2D)          (None, 4, 4, 32)          160       
                                                                 
 conv2d_49 (Conv2D)          (None, 4, 4, 64)          8256      
                                                                 
 conv2d_50 (Conv2D)          (None, 4, 4, 128)         32896     
                                                                 
 flatten_16 (Flatten)        (None, 2048)              0         
                                                                 
 dense_24 (Dense)            (None, 128)               262272    
                                                                 
 dense_25 (Dense)            (None, 64)                8256      
                                                                 
 dense_26 (Dense)            (None, 4)               

Total params: 312100 (1.19 MB)
Trainable params: 312100 (1.19 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [69]:
# Train the agent
import matplotlib.pyplot as plt
import numpy as np

score_array = []
episode_number = 200
for _ in range(episode_number):  # Train for a certain number of episodes
    action_space = ['w','a','s','d']
    score = train_model(model, action_space, new_custom_reward, 100)
    score_array.append(score)
    if(len(score_array) % 50 == 0):
        iterations = list(range(1, len(score_array) + 1))  # Create a list of episode numbers

        # Create the plot
        plt.figure(figsize=(10, 6))
        plt.scatter(iterations, score_array, marker='o', s=80, c='b', label='Scores')
        plt.plot(iterations, score_array, linestyle='--', c='b')

        # Calculate the line of best fit (linear regression)
        coefficients = np.polyfit(iterations, score_array, 1)
        line_of_best_fit = np.poly1d(coefficients)

        # Plot the line of best fit
        plt.plot(iterations, line_of_best_fit(iterations), c='r', label='Line of Best Fit')

        plt.title('Scores vs. Iterations')
        plt.xlabel('Iterations')
        plt.ylabel('Scores')
        plt.grid(True)

        # Show the plot
        plt.legend(loc='upper left')
        plt.show()


s
s
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a


UnboundLocalError: cannot access local variable 'all_guesses' where it is not associated with a value