In [35]:
import os, sys, time, datetime, json, random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, PReLU, Activation, Conv2D, Flatten
from tensorflow.keras.optimizers import SGD , Adam, RMSprop
import matplotlib.pyplot as plt
%matplotlib inline

In [42]:
def build_dqn_model(lr=0.001):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(16, 16, 1)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(4, activation='linear'))  # 4 actions: up, down, left, right
    model.compile(optimizer=Adam(learning_rate=lr), loss='mse')
    return model

In [44]:
modelA = build_dqn_model()
modelA.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 14, 14, 32)        320       
_________________________________________________________________
flatten_2 (Flatten)          (None, 6272)              0         
_________________________________________________________________
dense_18 (Dense)             (None, 128)               802944    
_________________________________________________________________
dense_19 (Dense)             (None, 4)                 516       
Total params: 803,780
Trainable params: 803,780
Non-trainable params: 0
_________________________________________________________________


In [33]:
mazes = []
folder = 'mazes'  # Adjust the path based on the relative location

for filename in os.listdir(folder):
    if filename.endswith(".json"):
        file_path = os.path.join(folder, filename)
        with open(file_path, 'r') as f:
            maze = json.load(f)
            mazes.append(maze)

In [25]:
split_index = int(0.8 * len(mazes))
train_mazes = mazes[:split_index]
validation_mazes = mazes[split_index:] 

In [45]:
import numpy as np
import random
from collections import deque

class DQNAgent:
    def __init__(self, state_shape, action_size):
        self.state_shape = state_shape
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = build_dqn_model(lr=self.learning_rate)
        self.target_model = build_dqn_model(lr=self.learning_rate)
        self.update_target_model()

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = self.model.predict(state)
            if done:
                target[0][action] = reward
            else:
                t = self.target_model.predict(next_state)
                target[0][action] = reward + self.gamma * np.amax(t[0])
            self.model.fit(state, target, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


In [55]:
def preprocess_state(state):
    # Assuming 'state' is a dictionary and we want to flatten all values into a single array
    state_values = []
    for value in state.values():
        if isinstance(value, (list, np.ndarray)):
            state_values.extend(value)
        else:
            state_values.append(value)
    return np.array(state_values)

In [56]:
agent = DQNAgent(state_shape=(16, 16, 1), action_size=4)

# Training loop
for e in range(50):
    for maze in mazes:
        state = preprocess_state(maze)  # convert maze to state
        state = np.reshape(state, [1, 16, 16, 1])
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done = take_action(maze, action)
            next_state = np.reshape(next_state, [1, 16, 16, 1])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                agent.update_target_model()
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)


ValueError: cannot reshape array of size 1024 into shape (1,16,16,1)