## Dependecies needed

In [2]:
from DQN_Agent import DQN_Agent
from Tetris8x6 import Tetris
import numpy as np
import time
from collections import deque
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()
import pickle
from PIL import Image

## Train the DQN 

MAKE SURE YOU HAVE AN EMPTY FOLDER WITH THE NAME FOLDER IN YOUR CURRENT DIRECTORY

In [None]:
def train():
    running_scores = deque(maxlen=100)
    agent = DQN_Agent()
    env = Tetris(mode = 'glimpse')
    total_actions = 0
    finished = False
    episode = 0

    while not finished:
        episode += 1
        score = 0
        state = env.reset()
        done = False

        while not done:
            total_actions += 1
            action = agent.choose_action(state)
            new_state, reward, done = env.play(action)
            agent.update_replay_buffer(action, state, new_state, done, reward)
            agent.learn()
            state = new_state
            score += env.lines_cleared

            if (total_actions % 5000) == 0:
                agent.save_model(episode)
                agent.save_buffer(episode)
                t = time.localtime()
                current_time = time.strftime('%H:%M:%S', t)
                with open('checkpoint.txt', 'a') as checkpoint:
                    checkpoint.write(
                        f'Checkpoint :: Episode {episode},Frames={agent.frames},Epsilon={agent.epsilon},time : {current_time} \n')

        running_scores.append(score)
        if (episode % 100) == 0:
            t = time.localtime()
            current_time = time.strftime('%H:%M:%S', t)
            with open('log.txt', 'a') as logger:
                logger.write(f'Episode: {episode} at time {current_time} with avg score {np.mean(running_scores)}\n')

        if np.mean(running_scores) >= 50:
            finished = True
            agent.save_model(episode)
            agent.save_buffer(episode)
            t = time.localtime()
            current_time = time.strftime('%H:%M:%S', t)
            with open('checkpoint.txt', 'a') as checkpoint:
                checkpoint.write(
                    f'FINISHED! Checkpoint :: Episode {episode},Frames={agent.frames},Epsilon={agent.epsilon},time : {current_time} \n')

In [None]:
train()

## Evaluate the DQN

In [None]:
#Functions used in evaluating the mode

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
#creates a brand new NN model
def create_model():
    # Network defined by the Deepmind paper
    inputs = layers.Input(shape=(8, 11,))

    # Convolutions on the frames on the screen
    layer1 = layers.Conv1D(32, 3, strides=3, activation="relu",kernel_initializer=tf.keras.initializers.VarianceScaling(scale=2))(inputs)
    layer2 = layers.Conv1D(64, 2, strides=2, activation="relu",kernel_initializer=tf.keras.initializers.VarianceScaling(scale=2))(layer1)

    layer4 = layers.Flatten()(layer2)

    layer5 = layers.Dense(512, activation="relu",kernel_initializer=tf.keras.initializers.VarianceScaling(scale=2))(layer4)
    action = layers.Dense(24, activation="linear",kernel_initializer=tf.keras.initializers.VarianceScaling(scale=2))(layer5)

    return keras.Model(inputs=inputs, outputs=action)

#Chooses the appropriate action based on the model
def choose_action_test(model,state):
    state_tensor = tf.convert_to_tensor(state)
    state_tensor = tf.expand_dims(state_tensor, 0)
    # outputs the Q value for each action 
    Q_val = model(state_tensor, training=False)
    # Take best action
    action = tf.argmax(Q_val[0]).numpy()
    return action


In [None]:
#Visualize the agent
model = create_model()
model.load_weights('models/model_frames_1000000.h5')
test_env = Tetris(mode = 'glimpse', render_mode = 'extra')
state = test_env.reset()
done = False
score = 0
while not done:

    action = choose_action_test(model,state)
    new_state, reward, done = test_env.play(action, render = True, render_delay=0.5)
#     test_env.render()
    state = new_state
    score += reward


In [None]:
#record the perfomance of the agent
results_dqn = []
for i in range(5000,1005000,5000):
    model = create_model()
    model.load_weights('models/model_frames_'+str(i)+'.h5')
    test_env = Tetris()
    model_results = []
    for i in range(10):
        state = test_env.reset()
        done = False
        score = 0
        total_lines = 0
        singles = 0
        doubles = 0
        triples = 0
        
        while not done:

            action = choose_action_test(model,state)
            new_state, reward, done = test_env.play(action)
            total_lines += test_env.lines_cleared
            score += reward
            if test_env.lines_cleared == 1:
                singles += 1
            elif test_env.lines_cleared ==2:
                doubles+= 1
            elif test_env.lines_cleared ==3:
                triples +=1
            state = new_state
            
        model_results.append([score, total_lines, singles, doubles, triples ])
    results_dqn.append(np.mean(model_results,axis =0))

pickle.dump(results_dqn, open('results_dqn.p','wb'))

In [None]:
#plot the perfomance
with open('results_dqn.p', 'rb') as f:
    results = pickle.load(f)
x = [i for i in range(5000,1005000,5000)]
plt.figure(figsize= (14,7))
z = np.polyfit(x, [i[0] for i in results], 3)
p = np.poly1d(z)
plt.plot(x, [i[0] for i in results])
plt.plot(x,p(x),"r")
plt.plot(x,[0.16 for i in range(len(x))],'k--')
plt.ticklabel_format(style = 'plain')
plt.xlabel('Num. Actions',size= 14)
plt.ylabel('Average Reward',size= 14)
plt.title('Glimpse reward',size= 14)
plt.show()

plt.figure(figsize= (14,7))
z = np.polyfit(x, [i[1] for i in results], 3)
p = np.poly1d(z)
plt.plot(x, [i[1] for i in results],label ='Lines Cleared')
plt.plot(x,p(x),"r", label = 'Mean agent Perfomance')
plt.plot(x,[0.16 for i in range(len(x))],'k--',label = 'Random agent')
plt.legend()
plt.ticklabel_format(style = 'plain')
plt.xlabel('Num. Actions',size= 14)
plt.ylabel('Average cleared lines',size= 14)
plt.title('DQN Glimpse',size= 14)
plt.savefig('lines cleared', bbox_inches='tight')
plt.show()

plt.figure(figsize= (14,7))
plt.plot(x, [i[2] for i in results], label = 'singles')
plt.plot(x, [i[3] for i in results],'y', label = 'doubles')
plt.plot(x, [i[4] for i in results],'r', label = 'triples')
plt.ticklabel_format(style = 'plain')
plt.legend()
plt.xlabel('Num. Actions',size= 14)
plt.ylabel('Lines Cleared',size= 14)
plt.savefig('sev', bbox_inches='tight')
plt.show()