# NES Tetris

This notebook serves as a quick way to run the NES Tetris project.

Some markdown snippets are provided to explain some stuff.

However, this is just really meant to run in Colab (on the side).

Most of the code here is copy-pasted from the regular files here.

# Constants

Contains some constants that we want to use.

Config settings are put in here too.

In [None]:
# Contains relevant constants to be used
import os

STARTING_LEVEL = 18
IS_TRUE_RANDOM_PIECES = False
WEIGHTS_FILENAME = 'weights_sample_8f_rrw'

EPISODES = 2000
EPISODE_INTERVAL = 50
EPSILON_LIMIT = 1500
SAVE_SEPARATE_TRAINING_INTERVALS = True

IS_DAS_ON = False
IS_ONE_DIRECTION_MOVEMENT = False 

# Board dimensions
BOARD_HEIGHT = 20
BOARD_WIDTH = 10

# Frames required for a piece to move down 1 cell vertically
FRAMES_PER_GRIDCELL_Y = [[48, 
   43, 38, 33, 28, 23, 
   18, 13, 8, 6, 5, 
   5, 5, 4, 4, 4, 
   3, 3, 3, 2, 2, 
   2, 2, 2, 2, 2,
   2, 2, 2, 1]]

# Delayed auto-shift, forces first move to be delayed to 16 frames
DAS_DELAY = 16

# How many frames it takes per move
FRAME_DELAY = 6 

# Initial lines needed for first level increase
LINES_FIRST_LEVEL_JUMP = [[10,
   20, 30, 40, 50, 60, 
   70, 80, 90, 100, 100,
   100, 100, 100, 100, 100, 
   110, 120, 130, 140, 150,
   160, 170, 180, 190, 200,
   200, 200, 200]]

# how many lines per subsequent level increase
LINES_PER_LEVEL = 10 

# reinforcement learning
ACTION_SIZE = 13
STATE_SIZE = 8

# Paths
WEIGHT_PATH = WEIGHTS_FILENAME + '.h5'
WEIGHT_DIR = ''
IMAGE_PATH = WEIGHTS_FILENAME + '.png'
LOG_DIR = 'logs'

# used to identify pieces
PIECE_ID_EMPTY = 0
PIECE_ID_I = 1
PIECE_ID_O = 2
PIECE_ID_T = 3
PIECE_ID_S = 4
PIECE_ID_Z = 5
PIECE_ID_J = 6
PIECE_ID_L = 7
PIECE_ID_CURRENT = 8
PIECE_Y_OFFSET_INITIAL = [0, 2, 0, 1, 1, 1, 1, 1] # y offset needed for initial placement

# Logger

The logger to be used.

In [None]:
# Contains the Logger to be used
import logging, sys, os

def setup_custom_logger():
    """Setups the custom logger to be used globally.

    Args:
        name (string): The name of the logger.
    Returns:
        The logger to be used in the script.
    """
    filename = os.getcwd() + '\\output.log' if 'win' in sys.platform else os.getcwd() + '/output.log'
    logging.basicConfig(filename=filename,
                            filemode='a',
                            format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                            datefmt='%H:%M:%S',
                            level=logging.INFO)
    log = logging.getLogger()
    log.setLevel(logging.INFO)

    stdout_handler = logging.StreamHandler(sys.stdout)
    log.addHandler(stdout_handler)
    return log

def get_logger():
    """Returns the logger to be used.

    Returns:
        The logger to be used in the script.
    """
    log = setup_custom_logger() if not logging.getLogger('root').hasHandlers() \
        else logging.getLogger('root')

    return log

log = get_logger()

# Shape

The Class corresponding to the Shape that a Piece represents.

In [None]:
# Contains the Shape class which is a template for the Piece class to refer to

class Shape:
  """The Class corresponding to the Shape that a Piece represents.

  This is meant as a template for the Piece class to reference.
  """
  def __init__(self, id, orientations):
      """
      Args:
        id (int): The ID of the Shape.
        orientations (List): The possible orientations of the Shape as a List of Lists of ' '
        and '#', which represent an empty and filled cell for that Shape.
      """
      self.id = id
      self.max_rotations = len(orientations)
      self.orientations = orientations
      self.shape_coord = [] # contains the coordinates of spaces filed by the shape
      
      # width and height of the Piece
      self.width = len(orientations[0][0])
      self.height = len(orientations[0])

      self.generate_all_shape_coord()

  def generate_all_shape_coord(self):
      """Generates shape coord all at once.
      """
      for rotation in range(self.max_rotations):
          self.shape_coord.append(list(self.generate_shape_coord(rotation)))

  def generate_shape_coord(self, rotation):
      """Generate the occupied coordinates of a shape.
      """
      orientation = self.get_orientation(rotation)
      width = self.width
      height = self.height
      for offset_x in range(width):
        for offset_y in range(height):
          if orientation[offset_y][offset_x] != ' ':
            yield offset_y, offset_x

  def get_orientation(self, rotation):
      """Returns the orientations of a piece, given a rotation value.
      """
      return self.orientations[rotation % self.max_rotations]

  def get_coord_occupied(self, rotation):
      """Returns the coordinates of cells occupied by a Shape.
      """
      return self.shape_coord[rotation % self.max_rotations]

  def get_id(self):
    return self.id

# the different possible Shapes of Pieces and their orientations
# id - 0 / empty, 1 / I, 2 / O, 3 / T, 4 / S, 5 / Z, 6 / J, 7 / L
# first shape is empty

SHAPE_NULL_COORDS = [[' ']]

SHAPE_I_COORDS = [[
    '    ',
    '    ',
    '####',
    '    ',
], [
    '  # ',
    '  # ',
    '  # ',
    '  # ',
]]

SHAPE_O_COORDS = [[
    '##',
    '##',
]]

SHAPE_T_COORDS = [[
    '   ',
    '###',
    ' # ',
], [
    ' # ',
    '## ',
    ' # ',
], [
    ' # ',
    '###',
    '   ',
], [
    ' # ',
    ' ##',
    ' # ',
]]

SHAPE_S_COORDS = [[
    '   ',
    ' ##',
    '## ',
], [
    ' # ',
    ' ##',
    '  #',
]]

SHAPE_Z_COORDS = [[
    '   ',
    '## ',
    ' ##',
], [
    '  #',
    ' ##',
    ' # ',
]]

SHAPE_J_COORDS = [[
    '   ',
    '###',
    '  #',
], [
    ' # ',
    ' # ',
    '## ',
], [
    '#  ',
    '###',
    '   ',
], [
    ' ##',
    ' # ',
    ' # ',
]]

SHAPE_L_COORDS = [[
    '   ',
    '###',
    '#  ',
], [
    '## ',
    ' # ',
    ' # ',
], [
    '  #',
    '###',
    '   ',
], [
    ' # ',
    ' # ',
    ' ##',
]]

# declare all the shapes

SHAPE_NULL = Shape(0, SHAPE_NULL_COORDS)

SHAPE_I = Shape(1, SHAPE_I_COORDS)

SHAPE_O = Shape(2, SHAPE_O_COORDS)

SHAPE_T = Shape(3, SHAPE_T_COORDS)

SHAPE_S = Shape(4, SHAPE_S_COORDS)

SHAPE_Z = Shape(5, SHAPE_Z_COORDS)

SHAPE_J = Shape(6, SHAPE_J_COORDS)

SHAPE_L = Shape(7, SHAPE_L_COORDS)

# the list of templated shapes
SHAPES = [SHAPE_I, SHAPE_O, SHAPE_T, SHAPE_S, SHAPE_Z, SHAPE_J, SHAPE_L]
SHAPES_ID = [SHAPE_NULL, SHAPE_I, SHAPE_O, SHAPE_T, SHAPE_S, SHAPE_Z, SHAPE_J, SHAPE_L]
SHAPES_COORDS = [SHAPE_NULL_COORDS, SHAPE_I_COORDS, SHAPE_O_COORDS, SHAPE_T_COORDS, 
                 SHAPE_S_COORDS, SHAPE_Z_COORDS, SHAPE_J_COORDS, SHAPE_L_COORDS]
SHAPES_NAMES = ["Empty", "Long bar", "O", "T", "S", "Z", "J", "L"]


# Piece

The Class corresponding to the Tetris Pieces.

In [None]:
# Contains the Piece class representing a Tetramino / Tetris piece

class Piece:
  """The class representing a Tetramino (or piece).

  Contains information about the coordinates and orientation of the piece.
  """
  def __init__(self, id, x=0, y=0, rotation=0):
    """Inits the Piece.

    Args:
      id (int): The ID corresponding to the shape of the Piece, which (as documented in shape.py)
      are 0 / empty, 1 / I, 2 / O, 3 / T, 4 / S, 5 / Z, 6 / J, 7 / L.
      x (int): The starting X coordinate of the Piece (From left to right).
      y (int): The starting Y coordinate of the Piece (From top to down).
      rotation (int): The rotation state of the Piece, as documented in shape.py (positive is 
      clockwise, negative is anti-clockwise).
    """
    self.x = x
    self.y = y
    self.id = id
    self.rotation = rotation

    self.shape_coords = None # the generic coordinates occupied by a shape of the Piece
    self.coords = []
    self.prev_coords = []

  def rotate(self, value):
    """Rotates the Piece based on the given value.

    Args:
      value (int): The rotation to apply to the Piece (positive is clockwise, negative is 
      anti-clockwise).
    """
    self.rotation += value
    self.rotation = self.rotation % len(SHAPES_COORDS[self.id])

  def move(self, x, y, rotation):
    """Moves the Piece based on a given move.

    Args:
      x (int): The number of cells to move the Piece in the x direction (From left to right).
      y (int): The number of cells to move the Piece in the y direction (From top to down).
      rotation (int): The amount to rotate the Piece (positive is clockwise, negative is 
      anti-clockwise).
    """
    self.prev_coords = self.coords
    self.x += x
    self.y += y

    self.rotate(rotation)
    self.coords = self.update_coords()

    return self.prev_coords, self.coords

  def update_coords(self):
    """Updates the coordinates of a Piece based on its own information.
    """
    self.coords = []
    self.shape_coords = SHAPES_ID[self.id].get_coord_occupied(self.rotation)

    for coord in self.shape_coords:
      self.coords.append([self.y + coord[0], self.x + coord[1]])

    return self.coords

  def simulate_move(self, x, y, rotation):
    """Simulates a move and returns the coordinates of the Piece without altering
    its actual information.

    Args:
      x (int): The number of cells to move the Piece in the x direction (From left to right).
      y (int): The number of cells to move the Piece in the y direction (From top to down).
      rotation (int): The amount to rotate the Piece (positive is clockwise, negative is 
      anti-clockwise).
    Returns:
      The coordinates of the Piece as a Lists of Lists of (X, Y) for each cell that it occupies.
    """
    curr_x = self.x + x
    curr_y = self.y + y
    curr_rotation = self.rotation + rotation
    curr_rotation = curr_rotation % len(SHAPES_COORDS[self.id])

    coords = []
    shape_coords = SHAPES_ID[self.id].get_coord_occupied(curr_rotation)
    for coord in shape_coords:
      coords.append([curr_y + coord[0], curr_x + coord[1]])

    return coords
    
  def is_valid_state(self, board):
    """Returns True if the current state of the Piece is legal in a given board.

    Args:
      board (2D Array): The 2D array corresponding to the board state that is being
      validated.
    """
    for coord in self.coords:
      x = coord[1]
      y = coord[0]

      if x < 0 or x >= constants.BOARD_WIDTH or y < 0 or y >= constants.BOARD_HEIGHT:
        return False
      if board[y][x] != constants.PIECE_ID_EMPTY and board[y][x] != constants.PIECE_ID_CURRENT:
        return False

    return True

  def get_y_coord_shift(self):
    """Calculates the amount that the Piece will fall after a move.
    """
    # TODO: Implement based on a move generated by the board.
    #return FRAME_DELAY / FRAMES_PER_GRIDCELL_Y[STARTING_LEVEL]

    # return a single drop for the time being for simplicity
    return 1

  def get_id(self):
    return self.id

  def print_info(self):
    """Prints the data of the Piece.
    """
    print("current id is:", self.get_id(), "/", SHAPES_NAMES[self.id],
          "Rotation:", self.rotation)
    print("coords:", self.get_coords())

    print("current piece arrangement")
    for line in SHAPES_COORDS[self.get_id()][self.rotation]:
      print("|", line, "|")

  def print_simulated_move(self, x, y, rotation_val):
    """Prints the data of the Piece, given a simulated move.
    
    Args:
      x (int): The number of cells to move the Piece in the x direction (From left to right).
      y (int): The number of cells to move the Piece in the y direction (From top to down).
      rotation (int): The amount to rotate the Piece (positive is clockwise, negative is 
      anti-clockwise).
    """
    rotation = self.rotation + rotation_val
    rotation = rotation % len(SHAPES_COORDS[self.id])

    print("current id is:", self.get_id(), "/", SHAPES_NAMES[self.id],
          "Rotation:", rotation)
    print("coords:", self.simulate_move(x, y, rotation_val))

    print("current piece arrangement")
    for line in SHAPES_COORDS[self.get_id()][rotation]:
      print("|", line, "|")

  def get_coords(self):
    """Returns the coordinates of cells occupied by the Piece.
    """
    return self.coords

  def get_prev_coords(self):
    """Returns the previous coordinates of cells occupied by the Piece.
    """
    return self.prev_coords

  def get_rotation(self):
    """Returns the current rotation value of the Piece.
    """
    return self.rotation


# Board

The Class representing the Tetris board that an agent plays with.

In [None]:
# Contains the Board class representing the Tetris board
import random, copy

class Board:
  """The Class representing a NES Tetris board.
  """
  def __init__(self, height, width):
    """
    Args:
      height (int): The height of the board.
      width (int): The width of the board.
    """
    self.height = height
    self.width = width

    self.pieces_table = [[0 for i in range(width)] for j in range(height)]

    self.piece = None
    self.piece_next = None

    self.ticks = 0 # the current frame count
    self.level = STARTING_LEVEL
    self.move_count = 0
    self.piece_count = 0
    self.line_clears = 0
    self.line_clears_for_next_level = LINES_FIRST_LEVEL_JUMP[0][self.level]

    self.line_clear_single = 0
    self.line_clear_double = 0
    self.line_clear_triple = 0
    self.line_clear_tetris = 0

    self.scoring_system = self.generate_scoring_system()
    self.reward_system = self.generate_reward_system()
    self.score = 0
    self.game_over = False

    # NOTE: unused as holding pieces are not allowed in NES tetris, but can be configured later on
    self.piece_holding = None
    self.piece_last = None
    self.can_hold = False

    # generate the random bag of pieces
    self.bag = self.generate_bag()
    self.piece = self.create_piece()
    self.piece_next = self.create_piece()

  def generate_bag(self):
    """Returns a List of Piece objects.

    NOTE: If IS_TRUE_RANDOM_PIECES is set to True, then the bag is randomly generated, 
    otherwise it is guaranteed to have all 7 unique pieces returned.
    """
    random_shapes = list(SHAPES)
    random.shuffle(random_shapes)

    bag = []
    # generate bag based on whether true random or fixed
    if IS_TRUE_RANDOM_PIECES:
      for i in range(7):
        id = random_shapes[random.randint(0, 6)].get_id()
        bag.append(Piece(id))
    else:
      for shapes in random_shapes:
        bag.append(Piece(shapes.get_id()))

    return bag

  def create_piece(self):
    """Returns the first piece in the List representing the bag.
    If the bag is non-existent, then a new one is created.
    """
    if not self.bag:
      self.bag = self.generate_bag()
    return self.bag.pop()

  def place_new_piece(self):
    """Put the new piece on the board and then update the next piece.
    """
    self.piece = self.piece_next
    self.piece_next = self.create_piece()

    # place the piece in the middle of the board
    self.piece.move(int(self.width / 2), 0 - PIECE_Y_OFFSET_INITIAL[self.piece.get_id()], 0)

    if not self.update_board():
      self.game_over = True
      return

    self.piece_count += 1
    self.ticks = 0

  def move_piece(self, move, verbose = False):
    """Moves the piece, assumes that move is legal and expects a tuple of (x, y, rotation).
    The move is assumed to be legal and checked beforehand.

    Args:
      move (tuple): A tuple of (x, y, rotation) representing the movement in x / y dimension
      and the rotation of the piece.
      verbose (Boolean): Prints the current piece state and render the board if True.
    """
    self.get_current_piece().move(move[0], move[1], move[2])
    self.update_board()
    self.lines_cleared_recently = 0

    if self.is_piece_set():
      self.set_piece()
      self.lines_cleared_recently = self.update_line_clear()
      self.update_score(self.lines_cleared_recently)      
      self.place_new_piece()

      if verbose:
        log.info('========================================')
        self.render_board()
    elif verbose:
      self.print_current_piece_state()
      self.render_board()

  def set_piece(self):
    """Sets the piece on the board.
    It is assumed that piece can be set and is checked beforehand.
    """
    for coord in self.piece.get_coords():
      self.pieces_table[coord[0]][coord[1]] = self.piece.get_id()

  def force_set_next_piece(self):
    """Manually sets the next piece, only done when the next piece has no 
    Available moves, and cannot be set because of this
    """
    if self.is_piece_set():
      self.set_piece()

  def update_board(self):
    """Updates the current piece's coords on the board, and returns True if successful.
    """
    try:
      # remove the old piece from the board
      for coord in self.piece.get_prev_coords():
        if self.pieces_table[coord[0]][coord[1]] == PIECE_ID_CURRENT:
          self.pieces_table[coord[0]][coord[1]] = PIECE_ID_EMPTY
        else:
          #self.reset_move(self.piece.get_coords(), self.piece.get_prev_coords())
          return False

      # add the new position of the piece
      for coord in self.piece.get_coords():
        if self.pieces_table[coord[0]][coord[1]] > PIECE_ID_EMPTY and \
          self.pieces_table[coord[0]][coord[1]] < PIECE_ID_CURRENT:
          self.reset_move(self.piece.get_coords(), self.piece.get_prev_coords())
          return False
        else:
          self.pieces_table[coord[0]][coord[1]] = PIECE_ID_CURRENT

      self.move_count += 1
      return True
    except IndexError:
      log.warn('Out of bounds when updating board')
      log.warn(self.piece.get_coords())

  def update_line_clear(self):
    """Runs through the board, clears any filled lines and returns how many that were cleared
    """
    lines_to_clear = [] # keep track of previous lines cleared to prevent double-count
    lines_cleared = []

    # run through all coords of the current piece to get the lines to clear
    for coord in self.piece.get_coords():
      if self.is_line_clear(coord[0]) and coord[0] not in lines_to_clear:
        lines_to_clear.append(coord[0])

    # clear the lines
    lines_to_clear.reverse()
    for line in lines_to_clear:
      lines_cleared.append(self.clear_line(line))

    no_of_lines_cleared = len(lines_to_clear)

    # add back lines
    self.add_empty_lines(no_of_lines_cleared)

    self.line_clears += no_of_lines_cleared

    if no_of_lines_cleared == 1:
      self.line_clear_single += 1
    elif no_of_lines_cleared == 2:
      self.line_clear_double += 1
    elif no_of_lines_cleared == 3:
      self.line_clear_triple += 1
    elif no_of_lines_cleared == 4:
      self.line_clear_tetris += 1

    return len(lines_to_clear)

  def clear_line(self, line_number):
    """Clears the specified line.
    """
    return self.pieces_table.pop(line_number)

  def add_empty_lines(self, lines_to_add):
    """Adds an empty line at the top of the board.
    """
    for i in range(lines_to_add):
      self.pieces_table.insert(0, [0 for i in range(self.width)])

  def update_level(self):
    """Checks and updates level according to line clears.
    """
    if self.lines >= self.line_clears_for_next_level:
      self.level += 1
      self.line_clears_for_next_level += 10

  def reset_move(self, coords, prev_coords):
    """Resets the previous move made, because it was illegal and executed halfway
    This does not revert any overwritten cells that were previously occupied by a piece
    """
    # revert new state
    for coord in coords:
      if self.pieces_table[coord[0]][coord[1]] == PIECE_ID_CURRENT:
        self.pieces_table[coord[0]][coord[1]] = PIECE_ID_EMPTY

    # restore previous state
    for coords in prev_coords:
      self.pieces_table[coord[0]][coord[1]] = PIECE_ID_CURRENT

  def update_frame_count(self):
    """Update frame tick count and move the piece down.

    If 'IS_DAS_ON' is True in the config, then the first move will receive DAS delay.
    This assumes that the move only gets the delay at the first instance.
    """
    if IS_DAS_ON and self.ticks == 0:
      self.ticks += DAS_DELAY
    self.ticks += FRAME_DELAY

  def update_score(self, lines_cleared):
    """pdates the score based on the level
    """
    self.score += self.scoring_system[lines_cleared]

  def generate_scoring_system(self):
    """Returns a list of points given for line clears for the current level
    """
    return [0, (40 * self.level), (100 * self.level), (300 * self.level), (1200 * self.level)]

  def generate_reward_system(self):
    """Returns a list of points given for line clears for the current level
    """
    #return [0, (1 * self.level), (10 * self.level), (100 * self.level), (1000 * self.level)]
    return [0, 10, 100, 1000, 10000]

  def generate_state_info_board(self, new_coords, pieces_table_copy):
    """Performs a hard drop (places the piece on the board as if it would just fall 
    vertically only), then returns several things related to the new board.

    Returns:
      landing_height, the lowest point at which a piece will end up on the board.
      hard_drop_value, the number of cells that a piece will end up dropping.
      pieces_table_copy, the resulting board as a result of the piece being hard dropped.
    """
    # remove the piece
    for coord in self.piece.get_coords():
      pieces_table_copy[coord[0]][coord[1]] = PIECE_ID_EMPTY

    # try to do a hard drop
    hard_drop_value = 0
    for i in range(self.height):
      is_valid = True
      for coord in new_coords:
        if coord[0] + hard_drop_value + 1 >= self.height or pieces_table_copy[coord[0] + \
          hard_drop_value + 1][coord[1]] != PIECE_ID_EMPTY:
          is_valid = False
          break

      if is_valid:
        hard_drop_value += 1
      else:
        break
    
    # update the coords
    landing_height = self.height - 1
    for coord in new_coords:
      pieces_table_copy[coord[0] + hard_drop_value][coord[1]] = PIECE_ID_CURRENT
      landing_height = min(landing_height, coord[0] + hard_drop_value)

    #self.render_given_board(pieces_table_copy)
    return landing_height / 20, hard_drop_value, pieces_table_copy

  def render_board(self):
    """Prints out the entire board
    """
    for i in range(self.height):
      line_to_print = "|"
      for j in range(self.width):
        id = self.pieces_table[i][j]
        if id == PIECE_ID_EMPTY:
          line_to_print += " |"
        elif id == PIECE_ID_CURRENT:
          line_to_print += "=|"
        else:
          line_to_print += "X|"
      
      print(line_to_print)

  def render_given_board(self, board):
    """Prints out the entire board that is given as args.
    """
    for i in range(len(board)):
      line_to_print = "|"
      for j in range(len(board[0])):
        id = board[i][j]
        if id == PIECE_ID_EMPTY:
          line_to_print += " |"
        elif id == PIECE_ID_CURRENT:
          line_to_print += "=|"
        else:
          line_to_print += "X|"
      
      log.info(line_to_print)


  def print_current_piece_state(self):
    self.piece.print_info()

  def print_next_piece_state(self):
    self.piece_next.print_info()

  def is_valid_move(self, piece, move, board, verbose=False):
    """Returns True if the move is legal, False otherwise.
    """
    # ignore the elusive O spin
    if piece.get_id == 2 and move[2] != 0:
      return False

    coords = piece.simulate_move(move[0], move[1], move[2])
    if verbose:
      piece.print_simulated_move(move[0], move[1], move[2])
    for coord in coords:
      x = coord[1]
      y = coord[0]

      if x < 0 or x >= self.width or y < 0 or y >= self.height:
        return False
      if board[y][x] != 0 and board[y][x] != 8:
        return False

    return True

  def is_piece_set(self):
    """Returns if the piece should be set into the board
    Piece is considered set if it touches another piece on the board
    """
    for coord in self.piece.get_coords():
      if coord[0] == (self.height - 1):
        return True
      elif self.pieces_table[coord[0] + 1][coord[1]] > PIECE_ID_EMPTY and \
        self.pieces_table[coord[0] + 1][coord[1]] < PIECE_ID_CURRENT:
        return True

    return False

  def is_game_over(self):
    """Returns if the game is over.
    """
    return self.game_over

  def is_natural_drop(self):
    """Returns whether the piece should drop on its own and reset tick count if true.
    """
    if self.ticks > FRAMES_PER_GRIDCELL_Y[0][self.level]:
      self.ticks = self.ticks % FRAMES_PER_GRIDCELL_Y[0][self.level]
      return True

    return False

  # returns if a given line in the board is to be cleared
  def is_line_clear(self, line_number):
    for i in range(len(self.pieces_table[line_number])):
      if self.pieces_table[line_number][i] == PIECE_ID_EMPTY:
        return False 
    return True

  def get_available_moves_state_info(self, verbose = False):
    """Returns a list of tuples of move and state info of each possible move.
    Each move is a tuple of 3 values (x offset, y offset, rotation).
    State info is a tuple of values corresponding to information of the resulting board.
    """
    # TODO: frame delay system
    self.update_frame_count() # will move piece down naturally if needed

    moves = [[-1, 1, 0],  [-2, 2, 0], [-3, 3, 0], [-4, 4, 0], [-5, 5, 0], [-6, 6, 0], \
      [-1, 1, -1], [-1, 1, 1], [-2, 2, -1], [-2, 2, 1], [-3, 3, -1], [-3, 3, 1], \
        [-4, 4, -1], [-4, 4, 1], [-5, 5, -1], [-5, 5, 1], [-6, 6, -1], [-6, 6, 1], \
          [-1, 2, -2], [-1, 2, 2], [-2, 2, -2], [-2, 2, 2], [-3, 3, -2], [-3, 3, 2], \
            [-4, 4, -2], [-4, 4, 2], [-5, 5, -2], [-5, 5, 2], [-6, 6, -2], [-6, 6, 2], \
              [-1, 2, -3], [-1, 2, 3], [-2, 2, -3], [-2, 2, 3], [-3, 3, -3], [-3, 3, 3], \
                [-4, 4, -3], [-4, 4, 3], [-5, 5, -3], [-5, 5, 3], [-6, 6, -3], [-6, 6, 3], \
                  [1, 1, 0], [2, 2, 0], [3, 3, 0],  [4, 4, 0], [5, 5, 0], [6, 6, 0], \
                    [1, 1, -1], [1, 1, 1], [2, 2, -1], [2, 2, 1], [3, 3, -1], [3, 3, 1], \
                      [4, 4, -1], [4, 4, 1], [5, 5, -1], [5, 5, 1], [6, 6, -1], [6, 6, 1], \
                        [1, 2, -2], [1, 2, 2], [2, 2, -2], [2, 2, 2], [3, 3, -2], [3, 3, 2], \
                          [4, 4, -2], [4, 4, 2], [5, 5, -2], [5, 5, 2], [6, 6, -2], [6, 6, 2], \
                            [1, 2, -3], [1, 2, 3], [2, 2, -3], [2, 2, 3], [3, 3, -3], [3, 3, 3], \
                              [4, 4, -3], [4, 4, 3], [5, 5, -3], [5, 5, 3], [6, 6, -3], [6, 6, 3], \
                                [0, 1, 0], [0, 1, 1], [0, 1, -1], [0, 2, 2], [0, 2, -2], [0, 3, 3], [0, 3, -3]]
    
    legal_moves = []
    tucks_and_spins = []

    test_pieces_table = copy.deepcopy(self.pieces_table)
    for move in moves:
      if self.is_valid_move(self.piece, move, test_pieces_table, verbose=verbose):
        test_pieces_table = copy.deepcopy(self.pieces_table)
        legal_moves.append([move, self.get_board_info(move, test_pieces_table, verbose=verbose)])

        # tuck / spin
        for tuck_or_spin in self.generate_tucks_and_spins(move, test_pieces_table, verbose=verbose):
          legal_moves.append([tuck_or_spin, self.get_board_info(tuck_or_spin, copy.deepcopy(self.pieces_table), verbose=verbose)])

    return legal_moves

  def get_board_info(self, move, pieces_table_copy, verbose=False):
    """Returns the state info of the board, to be passed into the agent
    More state info can be given to the agent if needed.

    NOTE: The piece is moved and then hard dropped to evaluate how good the move may be.
    """
    # get the new board state
    coords = self.piece.simulate_move(move[0], move[1], move[2]) # the new coords
    landing_height, hard_drop_value, simulated_pieces_table = \
      self.generate_state_info_board(coords, pieces_table_copy)

    # the average height of columns with a piece
    board_height = self.get_state_board_height(simulated_pieces_table)

    # the sum of differences between heights of a column and its adjacent columns
    bumpiness = self.get_state_bumpiness(simulated_pieces_table)

    # how many cells have an empty cell below
    holes = self.get_state_holes(simulated_pieces_table)

    # how many columns are empty
    #wells = self.get_state_wells(simulated_pieces_table)

    # how many lines will be cleared by the move
    lines_cleared = self.get_state_line_features(simulated_pieces_table, coords, hard_drop_value)

    lines_ready_to_clear = self.get_lines_ready_to_clear(simulated_pieces_table)

    # how many empty / filled cells are adjacent to a filled / empty cell on the same row
    row_transitions, col_transitions = self.get_state_transitions(simulated_pieces_table)

    # proportion of pieces on the left of the board
    #proportion_left = self.get_proportion_left_side(simulated_pieces_table)

    # if a right well is present
    right_well = self.get_right_well(simulated_pieces_table)

    if verbose:
      log.info(' * landing_height:             ' + str(landing_height))
      #log.info(' * board_height:               ' + str(board_height))
      #log.info(' * bumpiness:                  ' + str(bumpiness))
      #log.info(' * holes:                      ' + str(holes))
      #log.info(' * lines_cleared:              ' + str(lines_cleared))
      #log.info(' * lines_ready_to_clear:       ' + str(lines_ready_to_clear))
      #log.info(' * row_transitions:            ' + str(row_transitions))
      #log.info(' * col_transitions:            ' + str(col_transitions))
      #log.info(' * proportion_left:            ' + str(proportion_left))
      #log.info(' * right_well:                 ' + str(right_well))

    move[1] += hard_drop_value

    return [landing_height, board_height, bumpiness, holes, lines_cleared, \
      row_transitions, lines_ready_to_clear, right_well]
    #return [landing_height, board_height, bumpiness, holes, lines_cleared, \
    #  lines_ready_to_clear, row_transitions, col_transitions, proportion_left, right_well]

  def get_state_line_features(self, board, coords, hard_drop_value):
    """Returns the number of lines cleared when a piece is hard dropped.

    The agent should want to clear lines with a move.

    Args:
      board (Array): The 2D Array of the board.
      coords (List of Tuples): The List of Tuples of (y, x) coordinates.
      hard_drop_value (int): The number of spaces that a piece hard drops.
    """
    lines_to_clear = set() # keep track of previous lines to not double-count

    # run through all coords of the current piece to get the lines to clear
    for coord in coords:
      if coord[0] not in lines_to_clear:
        is_cleared = True

        for j in range(self.width): # left to right
          if board[coord[0] + hard_drop_value][j] == PIECE_ID_EMPTY:
            is_cleared = False
            break

        if is_cleared:
          lines_to_clear.add(coord[0])

    return len(lines_to_clear) / 4

  def get_lines_ready_to_clear(self, board):
    """Returns the number of rows that are filled except for the last column in the board.

    The agent should make sure that the end board state is ready to score.
    """
    lines_ready_to_clear = 0

    for i in range(self.height):
      ready_flag = True
      for j in range(self.width - 1):
        if board[i][j] == PIECE_ID_EMPTY:
          ready_flag = False
          break

      if ready_flag and board[i][self.width - 1] == PIECE_ID_EMPTY:
        lines_ready_to_clear += 1

    return lines_ready_to_clear / 20

  def get_state_wells(self, board):
    """Returns how many wells there are in the board.
    """
    wells = 0
    for i in range(len(board[0])): # column
      is_well = True
      for j in range(len(board)): # row
        if board[j][i] != PIECE_ID_EMPTY:
          is_well = False
          break

      if is_well:
        wells += 1

    return wells

  def get_state_board_height(self, board):
    """Returns the average height of all non-empty columns.
    """
    try:
      empty_columns = 0
      total_height = 0

      for i in range(self.width): # column
        height = 0
        for j in range(self.height): # row
          if board[j][i] != PIECE_ID_EMPTY:
            height = j
            break
        if height == 0:
          empty_columns += 1
        else:
          total_height += (self.height - height)

      return (total_height / (self.width - empty_columns) / 20)
    except ZeroDivisionError:
      return 0

  def get_state_board_height_features(self, board):
    """Returns several height related features from a board.

    Returns:
      Average board height of filled columns.
      Max height of all filled columns.
    """
    try:
      empty_columns = 0
      total_height = 0

      max_height = 0

      for i in range(len(board[0])): # column
        height = 0
        for j in range(len(board) - 1, 0, -1): # row
          if board[j][i] != PIECE_ID_EMPTY:
            height = j
            max_height = max(max_height, j)
            break
        if height == 0:
          empty_columns += 1
        else:
          total_height += height

      return total_height / (self.width - empty_columns), max_height
    except ZeroDivisionError:
      return 0, 0

  # returns the sum total of differences between height of each column and its adjacent one
  def get_state_bumpiness(self, board):
    bumpiness = 0

    # iterate through each column
    for i in range(self.width):
      prev_height = 0
      height = 0
      for j in range(self.height):
        if board[j][i] != PIECE_ID_EMPTY: # get the first non-empty cell
          height = j
          if i != 0:
            bumpiness += abs(height - prev_height)
          prev_height = height

          break
      
    return bumpiness / 100

  # returns how many cells that have an empty cell below
  def get_state_holes(self, board):
    holes = 0

    for i in range(self.width):
      flag = False # presence of a cell that is filled
      for j in range(self.height):
        if board[j][i] != PIECE_ID_EMPTY:
          flag = True
        elif flag and board[j][i] == PIECE_ID_EMPTY:
          holes += 1

    return 1 - (holes / 100)

  def get_state_transitions(self, board):
    """Returns the row and column transitions.

    Transitions are the sum of different cells adjacent to one another in row / col.
    """
    row_transitions = 0
    col_transitions = 0

    for i in range(self.height - 1):
      for j in range(self.width):
        # row transition
        if j != self.width - 1:
          if board[i][j] != board[i][j + 1]:
            row_transitions += 1
        if board[i][j] != board[i + 1][j]:
          col_transitions += 1

    return 1 - (row_transitions / 100), 1 - (col_transitions / 100)

  def get_proportion_left_side(self, board):
    """Returns the proportion of pieces that lie on the left side of the board.

    Agent should try to stack on the left based on general player behavior.
    """
    try:
      count_left = 0
      count_right = 0

      mid = int(self.width / 2)

      for i in range(0, mid, 1):
        for j in range(self.height):
          if board[j][i] != PIECE_ID_EMPTY:
            count_left += 1

      for i in range(mid, self.width, 1):
        for j in range(self.height):
          if board[j][i] != PIECE_ID_EMPTY:
            count_right += 1

      return count_left / (count_left + count_right)
    except ZeroDivisionError:
      return 0

  def get_right_well(self, board):
    """Returns the presence of a right well.

    Agent should try to maintain a right well to make Tetrises.
    """
    for i in range(self.height):
      if board[i][self.width - 1] != PIECE_ID_EMPTY:
        return 0

    return 1

  def generate_tucks_and_spins(self, move, pieces_table_copy, verbose=False):
    """Returns a List of moves corresponding to tucks and spins.
    """
    tuck_left = copy.deepcopy(move)
    tuck_right = copy.deepcopy(move)
    spin_left = copy.deepcopy(move)
    spin_right = copy.deepcopy(move)

    tuck_left[0] -= 1
    tuck_right[0] += 1
    spin_left[2] -= 1
    spin_right[2] += 1

    legal_moves = []
    tucks_or_spins = [tuck_left, tuck_right, spin_left, spin_right]
    for tuck_or_spin in tucks_or_spins:
      if self.is_valid_move(self.piece, tuck_or_spin, pieces_table_copy, verbose=verbose):
        legal_moves.append(tuck_or_spin)
        if verbose:
          log.info(tuck_or_spin)

    return legal_moves

  # gets the current piece
  def get_current_piece(self):
    return self.piece

  def get_piece_count(self):
    return self.piece_count

  def get_move_count(self):
    return self.move_count

  def get_lines_cleared(self):
    return self.line_clears

  def get_line_clear_single(self):
    return self.line_clear_single

  def get_line_clear_double(self):
    return self.line_clear_double

  def get_line_clear_triple(self):
    return self.line_clear_triple

  def get_line_clear_tetris(self):
    return self.line_clear_tetris

  def get_level(self):
    return self.level

  def get_score(self):
    return self.score

  # return the number of lines that have been cleared in the most recent move
  def get_lines_cleared_recently(self):
    return self.lines_cleared_recently

  # returns increase in score from lines cleared
  def get_score_increase(self, lines_cleared):
    return self.scoring_system[lines_cleared]

  def get_reward_increase(self, lines_cleared):
    return self.reward_system[lines_cleared]

  def get_reward_increase_well(self):
    return self.get_right_well(self.pieces_table)

  def get_deep_copy_pieces_table(self):
    return copy.deepcopy(self.pieces_table)


# AI

The Classes representing the agent that will play NES Tetris (badly).

In [None]:
# The Classes reprenting the agent that plays NES Tetris

import random, os
import tensorflow as tf
import numpy as np

class ExperienceBuffer:
  def __init__(self, buffer_size=20000):
    '''The storage buffer of past experiences for the agent to learn from.
    '''
    self.buffer = []
    self.buffer_size = buffer_size

  def add(self, experience):
    '''Adds a given experience to the buffer, and pops the last
    '''
    if len(self.buffer) > self.buffer_size:
      self.buffer.pop(0)
    self.buffer.append(experience)

  def sample(self, size):
    '''Returns a random sample from the buffer.
    '''
    return random.sample(self.buffer, size)

class Network:
  def __init__(self, state_size=STATE_SIZE, discount=1, epsilon=1, epsilon_min=0.0001, epsilon_episode_limit=500):
    """The network representing the agent.

    TODO: fill this in
    Args:
      state_size
      discount
      epsilon
      epsilon_min
      epsilon_episode_limit
    """
    self.state_size = state_size
    self.model = self.create_model()
    self.discount = discount
    self.epsilon = epsilon
    self.epsilon_min = epsilon_min
    self.epsilon_episode_limit = epsilon_episode_limit
    self.epsilon_decay = (epsilon - epsilon_min) / epsilon_episode_limit
    self.experiences = ExperienceBuffer()
    self.tensorboard = tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR,
                                                      histogram_freq=1000,
                                                      write_graph=True,
                                                      write_images=True)
    
  def create_model(self, verbose=False):
    """Creates and returns a model to be used.

    If verbose, prints out a summary of the model created.
    """
    model =  tf.keras.models.Sequential([
        tf.keras.layers.Dense(32, input_dim=self.state_size, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='linear'),
    ])

    model.compile(optimizer='adam', loss='mse', metrics=['mean_squared_error'])

    if verbose:
      model.summary()

    tf.keras.utils.plot_model(model, IMAGE_PATH, show_shapes=True)

    return model

  def act(self, obs):
    """Returns the best move based on a List of given states, unless agent decides to explore, 
    which it returns a random move instead.

    TODO: fill in this documentation with the proper types
    Returns:
    """
    # no moves
    if len(obs) == 0:
      return None, None
    # explore
    elif random.uniform(0, 1) < self.epsilon:
      return np.array(obs[random.randint(0, len(obs) - 1)])

    best_rating = None
    state_to_use = 0

    ratings = self.predict_ratings(obs)
    for i in range(len(obs)):
      if best_rating is None or (best_rating is not None and ratings[i] > best_rating):
        best_rating = ratings[i]
        state_to_use = i

    return obs[state_to_use][0], obs[state_to_use][1]

  def predict_ratings(self, states):
    """Returns the predictions from the agent as a List.
    """
    if len(states[0]) == 1:
      inputs = np.array(states)
    else:
      inputs = np.array([state[1] for state in states])

    # run the prediction, catch ValueError when states contain List() objs because
    # the state contains moves / board info, which are of different dimensions
    # and are not convertible under np.array(), and end up as List()s
    try:
      predictions = self.model.predict(states)
    except ValueError:
      predictions = self.model.predict(np.array([state[1] for state in states]))

    return [predict[0] for predict in predictions]

  def train(self, env, episodes=1):
    """Trains for a given number of episodes and returns a List of Lists for the
    steps, rewards, scores and line clears for the training episodes.
    """
    rewards = []
    scores = []

    lines_cleared = []
    lines_cleared_single = []
    lines_cleared_double = []
    lines_cleared_triple = []
    lines_cleared_tetris = []
    steps = 0

    for episode in range(episodes):
      obs = env.reset()
      current_state = env.board.get_board_info([0, 0, 0], env.board.get_deep_copy_pieces_table())

      done = False
      total_reward = 0

      while not done:
        action, next_state = self.act(obs)
        if action is None:
          done = True
          steps += 1
          total_reward -= 5
          continue

        obs, reward, done, info = env.step(action)
        self.experiences.add((current_state, reward, next_state, done))
        current_state = next_state
        steps += 1
        total_reward += reward

      rewards.append(total_reward)
      scores.append(env.board.get_score())

      lines_cleared.append(env.board.get_lines_cleared())
      lines_cleared_single.append(env.board.get_line_clear_single())
      lines_cleared_double.append(env.board.get_line_clear_double())
      lines_cleared_triple.append(env.board.get_line_clear_triple())
      lines_cleared_tetris.append(env.board.get_line_clear_tetris())

      self.learn()

    return [steps, rewards, scores, lines_cleared, lines_cleared_single, lines_cleared_double, \
      lines_cleared_triple, lines_cleared_tetris]

  def load(self, path=WEIGHT_PATH):
    """Loads weights from local .h5 file (default is taken from config.yml).
    """
    if os.path.isfile(path):
      self.model.load_weights(path)
      log.info('Loaded weights from ' + str(path))
    else:
      log.warn('Unable to load.')

  def save(self, path=WEIGHT_PATH):
    """Saves weights to local .h5 file (default is taken from config.yml).
    """
    try:
      #if not os.path.exists(os.path.dirname(WEIGHT_DIR)):
      #  os.makedirs(os.path.dirname(WEIGHT_DIR))

      self.model.save_weights(path)
      log.info('Saved weights to ' + str(path))
    except Exception:
      log.warn('Failed to save')

  def learn(self, batch_size=2048, epochs=1):
    """Model learns about recent experiences.

    Batch_size corresponds to the random experiences from experience buffer.
    """
    if len(self.experiences.buffer) < batch_size: # buffer too small, return
      return

    batch = self.experiences.sample(batch_size)
    train_x = []
    train_y = []

    states = np.array([[[0, 0, 0], x[2]] for x in batch])
    ratings = self.predict_ratings(states)

    for i, (previous_state, reward, next_state, done) in enumerate(batch):
      if not done:
        rating = ratings[i]
        q = reward + self.discount * rating
      else:
        q = reward
      train_x.append(previous_state)
      train_y.append(q)

    self.model.fit(np.array(train_x), np.array(train_y), batch_size=len(train_x), verbose=0,
                  epochs=epochs, callbacks=[self.tensorboard])
    self.epsilon = max(self.epsilon_min, self.epsilon - self.epsilon_decay)


# Environment

The openai gym environment to run the game in.

In [None]:
# The Class representing the gym environment
import gym
import numpy as np

class NesTetrisEnv(gym.Env):
  metadata = {'render.modes': ['console']}

  def __init__(self):
    super(NesTetrisEnv, self).__init__()
    self.action_space = gym.spaces.Discrete(ACTION_SIZE)
    self.observation_space = gym.spaces.Discrete(STATE_SIZE)

  def step(self, action, verbose=False):
    '''Executes one time step within the environment.
    
    Args:
      action (tuple): Tuple of (x, y, rotation).
    '''
    self.board.move_piece(action, verbose=verbose)
    moves = self.board.get_available_moves_state_info()
    done = self.board.is_game_over()

    reward = 1
    #reward += self.board.get_reward_increase_well()
    reward += self.board.get_reward_increase(self.board.get_lines_cleared_recently())
    if done:
      reward -= 5

    return np.array(moves), reward, done, {}
    
  def reset(self, verbose=False):
    '''Reset the state of the environment to an initial state and return initial observation
    '''
    self.board = Board(BOARD_HEIGHT, BOARD_WIDTH)
    self.board.place_new_piece()
    
    if verbose:
      self.board.render_board()
      self.board.print_current_piece_state()

    return np.array(self.board.get_available_moves_state_info())
    
  def render(self, mode='human', close=False):
    '''Render the environment to the screen
    '''
    self.board.render_board()
	

# Train

Time to train the agent.

In [None]:
# Trains and runs the agent

import statistics, time, os
import tensorflow as tf

env = NesTetrisEnv()
network = Network(epsilon=0.95, epsilon_episode_limit=EPSILON_LIMIT)

done = False
episodes_ran = 0
total_steps = 0
episodes = EPISODE_INTERVAL
total_episodes = EPISODES

log.info('======================================================================================')
log.info('training for ' + str(total_episodes) + ' episodes')
log.info('======================================================================================')

with tf.device('/GPU:0'):
  while not done:
    obs = env.reset()
    time_start = time.time()
    steps, rewards, scores, lines_cleared, lines_cleared_single, lines_cleared_double, \
      lines_cleared_triple, lines_cleared_tetris = network.train(env, episodes=episodes)
    episodes_ran += episodes
    total_steps += steps

    if SAVE_SEPARATE_TRAINING_INTERVALS:
      path_name = WEIGHTS_FILENAME + '_' + str(episodes_ran) + '_episodes.h5'
      network.save(path_name)
    else:
      network.save()

    log.info('======================================================================================')

    log.info(' * Total Games: '  + str(episodes_ran))
    log.info(' * Took total / per game (seconds): '  +  str(time.time() - time_start) +  ' / ' +  str((time.time() - time_start) / episodes))
    log.info(' * Total Steps: '  +  str(total_steps))
    log.info(' * Epsilon: '  +  str(network.epsilon))
    log.info(' * (Reward / Score / Lines Cleared) ')
    log.info(' * Median: '  +  str(statistics.median(rewards)) +  ' / ' +  str(statistics.median(scores)) +  ' / ' +  \
      str(statistics.median(lines_cleared)))
    log.info(' * Mean: '  +  str(statistics.mean(rewards)) +  ' / ' +  str(statistics.mean(scores)) +  ' / ' +  \
      str(statistics.mean(lines_cleared)))
    log.info(' * Min: '  +  str(min(rewards)) +  ' / ' +  str(min(scores)) +  ' / ' +  str(min(lines_cleared)))
    log.info(' * Max: '  +  str(max(rewards)) +  ' / ' +  str(max(scores)) +  ' / ' +  str(max(lines_cleared)))

    log.info('======================================================================================')

    log.info(' * Lines Cleared Statistics (Single / Double / Triple / Tetris):')
    log.info(' * Median: '  +  str(statistics.median(lines_cleared_single)) +  ' / ' +  \
      str(statistics.median(lines_cleared_double)) +  ' / ' +  \
        str(statistics.median(lines_cleared_triple)) +  ' / ' +  \
          str(statistics.median(lines_cleared_tetris)))
    log.info(' * Mean: '  +  str(statistics.mean(lines_cleared_single)) +  ' / ' +  \
      str(statistics.mean(lines_cleared_double)) +  ' / ' +  \
        str(statistics.mean(lines_cleared_triple)) +  ' / ' +  \
          str(statistics.mean(lines_cleared_tetris)))
    log.info(' * Min: '  +  str(min(lines_cleared_single)) +  ' / ' +  str(min(lines_cleared_double)) +  \
      ' / ' +  str(min(lines_cleared_triple)) +  ' / ' +  str(min(lines_cleared_tetris)))
    log.info(' * Max: '  +  str(max(lines_cleared_single)) +  ' / ' +  str(max(lines_cleared_double)) +  \
      ' / ' +  str(max(lines_cleared_triple)) +  ' / ' +  str(max(lines_cleared_tetris)))
    log.info('======================================================================================')

    # play a sample game
    obs = env.reset()
    sample_done = False

    while True:
      # get the next action
      if len(obs) != 0:
        action, state = network.act(obs)
      else:
        break
      obs, reward, sample_done, info = env.step(action)
    env.render()

    if episodes_ran >= total_episodes:
      done = True
      break
  network.save()

  log.info('training done')


# Run

Let the agent play the game.

In [None]:
# trains and runs the agent
 
import yaml, statistics, time, os
import tensorflow as tf

env = NesTetrisEnv()
network = Network(epsilon=0, epsilon_episode_limit=1)
network.load()

done = False

log.info('======================================================================================')
log.info('Running game')
log.info('======================================================================================')

with tf.device('/GPU:0'):  
  env = NesTetrisEnv()
  obs = env.reset()

  while not done:
    # get the next action
    if len(obs) != 0:
      action, state = network.act(obs)
      log.info('===============================================================')
      log.info('Move / States')
      log.info(action)
      log.info(' * landing_height:             ' + str(state[0]))
      log.info(' * board_height:               ' + str(state[1]))
      log.info(' * bumpiness:                  ' + str(state[2]))
      log.info(' * holes:                      ' + str(state[3]))
      log.info(' * lines_cleared:              ' + str(state[4]))
      log.info(' * lines_ready_to_clear:       ' + str(state[5]))
      log.info(' * row_transitions:            ' + str(state[6]))
      #log.info(' * col_transitions:            ' + str(state[7]))
      #log.info(' * proportion_left:            ' + str(state[8]))
      log.info(' * right_well:                 ' + str(state[7]))
      log.info('===============================================================')
    else:
      log.info('===============================================================')
      log.info('Game Over')
      log.info('===============================================================')
      break

    obs, reward, done, info = env.step(action, verbose=False)
    env.render()
  log.info('===============================================================')

  log.info('======FINISHED=======')
  log.info('Score: ' + str(env.board.get_score()))
  log.info('Lines cleared: ' + str(env.board.get_lines_cleared()))

  log.info('===============================================================')
  log.info('Lines cleared: ')
  log.info('Single: ' + str(env.board.get_line_clear_single()))
  log.info('Double: ' + str(env.board.get_line_clear_double()))
  log.info('Triple: ' + str(env.board.get_line_clear_triple()))
  log.info('Tetris: ' + str(env.board.get_line_clear_tetris()))

  log.info('===============================================================')
