## Define Tetris Game

In [None]:
### version 1

class Tetris:
    """Simplified Tetris."""
    
    TILES = [
        [
            [[0, 2]],  # Tile 0, orientation 0.
            [[0, 1], [0, 1]],  # Tile 0, orientation 1.
        ],
        [
            [[0, 1], [1, 2]],  # Tile 1, orientation 0.
            [[1, 2], [0, 1]],  # Tile 1, orientation 0.
        ],
        [
            [[0, 2], [1, 2]],  # Tile 2, orientation 0.
            [[0, 2], [0, 1]],  # Tile 2, orientation 1.
            [[0, 1], [0, 2]],  # Tile 2, orientation 2.
            [[1, 2], [0, 2]],  # Tile 2, orientation 3.
        ],
        [
            [[0, 2], [0, 2]],  # Tile 3, orientation 0.
        ],
    ]

In [None]:
### version 2

import numpy as np

class Tetris:
    """Simplified Tetris."""
    
    TILES = [
        [
            [[0, 2]],  # Tile 0, orientation 0.
            [[0, 1], [0, 1]],  # Tile 0, orientation 1.
        ],
        [
            [[0, 1], [1, 2]],  # Tile 1, orientation 0.
            [[1, 2], [0, 1]],  # Tile 1, orientation 0.
        ],
        [
            [[0, 2], [1, 2]],  # Tile 2, orientation 0.
            [[0, 2], [0, 1]],  # Tile 2, orientation 1.
            [[0, 1], [0, 2]],  # Tile 2, orientation 2.
            [[1, 2], [0, 2]],  # Tile 2, orientation 3.
        ],
        [
            [[0, 2], [0, 2]],  # Tile 3, orientation 0.
        ],
    ]
    
    UNDEFINED = -1

    def __init__(self, rows, cols):
        """Initialize Tetris."""
        
        self.rows, self.cols = rows, cols
        self.restart()
        
    def restart(self):
        """Restart the game."""
        
        self.board = np.full((self.rows, self.cols), Tetris.UNDEFINED)

In [None]:
### version 3

import numpy as np
import random

class Tetris:
    """Simplified Tetris."""
    
    TILES = [
        [
            [[0, 2]],  # Tile 0, orientation 0.
            [[0, 1], [0, 1]],  # Tile 0, orientation 1.
        ],
        [
            [[0, 1], [1, 2]],  # Tile 1, orientation 0.
            [[1, 2], [0, 1]],  # Tile 1, orientation 0.
        ],
        [
            [[0, 2], [1, 2]],  # Tile 2, orientation 0.
            [[0, 2], [0, 1]],  # Tile 2, orientation 1.
            [[0, 1], [0, 2]],  # Tile 2, orientation 2.
            [[1, 2], [0, 2]],  # Tile 2, orientation 3.
        ],
        [
            [[0, 2], [0, 2]],  # Tile 3, orientation 0.
        ],
    ]
    
    UNDEFINED = -1

    def __init__(self, rows, cols):
        """Initialize Tetris."""
        
        self.rows, self.cols = rows, cols
        self.restart()
        
    def restart(self):
        """Restart the game."""
        
        self.board = np.full((self.rows, self.cols), Tetris.UNDEFINED)
    
        self.current_tile = Tetris.UNDEFINED
        self.tile_x = Tetris.UNDEFINED
        self.tile_y = Tetris.UNDEFINED
        self.tile_orientation = Tetris.UNDEFINED
                
        self.next_tile()
        
    def next_tile(self):
        """Get the next tile."""
        
        self.current_tile = random.randint(0, len(Tetris.TILES) - 1)

        self.tile_x = self.cols // 2
        self.tile_y = self.rows
        self.tile_orientation = 0
    
    def move_left(self):
        """Move current tile to the left."""
        
        if self.tile_x - 1 >= 0:
            self.tile_x -= 1
            return True
        else:
            return False
    
    def move_right(self):
        """Move current tile to the right."""

        tile_width = len(Tetris.TILES[self.current_tile][self.tile_orientation])
        if self.tile_x + 1 <= self.cols - tile_width:
            self.tile_x += 1
            return True
        else:
            return False
    
    def rotate(self):
        """Rotate current tile."""

        new_orientation = ((self.tile_orientation + 1) 
                           % len(Tetris.TILES[self.current_tile]))
        tile_width = len(Tetris.TILES[self.current_tile][new_orientation])
        if self.tile_x <= self.cols - tile_width:
            self.tile_orientation = new_orientation
            return True
        else:
            return False
        
    def drop(self):
        """Drop current tile and update game board."""

        tile = Tetris.TILES[self.current_tile][self.tile_orientation]
        
        # Find first location where the piece collides with occupied locations.
        self.tile_y = 0
        for x in range(len(tile)):
            cury = -1
            for y in range(self.rows -1, -1, -1):
                if self.board[y, self.tile_x + x] > 0:
                    # Calculate the y position for this column 
                    # if no other columns are taken into account.
                    cury = y + 1 - tile[x][0]
                    break
            if self.tile_y < cury:
                self.tile_y = cury

            # Change board entries at the newly placed tile to occupied.
            for x in range(len(tile)):
                self.board[self.tile_y + tile[x][0]:self.tile_y + tile[x][1], 
                           x + self.tile_x] = 1

            # Remove full lines.
            removed_lines = 0
            for y in range(self.rows - 1, -1, -1):
                if np.sum(self.board[y, :]) == self.cols:
                    removed_lines += 1
                    for y1 in range(y, self.rows - 1):
                        self.board[y1, :] = self.board[y1 + 1, :]
                    self.board[self.rows - 1, :] = Tetris.UNDEFINED
            
            self.next_tile()

In [None]:
### version 4

import numpy as np
import random

class Tetris:
    """Simplified Tetris."""
    
    TILES = [
        [
            [[0, 2]],  # Tile 0, orientation 0.
            [[0, 1], [0, 1]],  # Tile 0, orientation 1.
        ],
        [
            [[0, 1], [1, 2]],  # Tile 1, orientation 0.
            [[1, 2], [0, 1]],  # Tile 1, orientation 0.
        ],
        [
            [[0, 2], [1, 2]],  # Tile 2, orientation 0.
            [[0, 2], [0, 1]],  # Tile 2, orientation 1.
            [[0, 1], [0, 2]],  # Tile 2, orientation 2.
            [[1, 2], [0, 2]],  # Tile 2, orientation 3.
        ],
        [
            [[0, 2], [0, 2]],  # Tile 3, orientation 0.
        ],
    ]
    
    UNDEFINED = -1

    def __init__(self, rows, cols, max_tiles, random_seed):
        """Initialize Tetris."""
        
        self.rows, self.cols = rows, cols
        self.max_tiles = max_tiles
        self.random_seed = random_seed
        
        self.restart()
        
    def restart(self):
        """Restart the game."""
        
        self.board = np.full((self.rows, self.cols), Tetris.UNDEFINED)
    
        self.current_tile = Tetris.UNDEFINED
        self.tile_x = Tetris.UNDEFINED
        self.tile_y = Tetris.UNDEFINED
        self.tile_orientation = Tetris.UNDEFINED

        self.gameover = False
        self.tile_count = 0

        # Create predefined tile sequence.
        rand_state = random.getstate()
        random.seed(self.random_seed)
        self.tile_sequence = [random.randint(0, len(Tetris.TILES) - 1) 
                              for x in range(self.max_tiles)]
        random.setstate(rand_state)

        self.next_tile()
        
    def next_tile(self):
        """Get the next tile."""
        
        if self.tile_count < self.max_tiles:
            if self.random_seed is not None:
                self.current_tile = self.tile_sequence[self.tile_count]
            else:
                self.current_tile = random.randint(0, len(Tetris.TILES) - 1)

            self.tile_x = self.cols // 2
            self.tile_y = self.rows
            self.tile_orientation = 0
            
            self.tile_count += 1
        else:
            self.gameover = True
                
    def move_left(self):
        """Move current tile to the left."""
        
        if self.tile_x - 1 >= 0:
            self.tile_x -= 1
            return True
        else:
            return False
    
    def move_right(self):
        """Move current tile to the right."""

        tile_width = len(Tetris.TILES[self.current_tile][self.tile_orientation])
        if self.tile_x + 1 <= self.cols - tile_width:
            self.tile_x += 1
            return True
        else:
            return False
    
    def rotate(self):
        """Rotate current tile."""

        new_orientation = ((self.tile_orientation + 1) 
                           % len(Tetris.TILES[self.current_tile]))
        tile_width = len(Tetris.TILES[self.current_tile][new_orientation])
        if self.tile_x <= self.cols - tile_width:
            self.tile_orientation = new_orientation
            return True
        else:
            return False
        
    def drop(self):
        """Drop current tile and update game board."""

        tile = Tetris.TILES[self.current_tile][self.tile_orientation]
        
        # Find first location where the piece collides with occupied locations.
        self.tile_y = 0
        for x in range(len(tile)):
            cury = -1
            for y in range(self.rows -1, -1, -1):
                if self.board[y, self.tile_x + x] > 0:
                    # Calculate the y position for this column 
                    # if no other columns are taken into account.
                    cury = y + 1 - tile[x][0]
                    break
            if self.tile_y < cury:
                self.tile_y = cury

        if self.tile_y + np.max(tile) > self.rows:
            self.gameover = True
        else:            
            # Change board entries at the newly placed tile to occupied.
            for x in range(len(tile)):
                self.board[self.tile_y + tile[x][0]:self.tile_y + tile[x][1], 
                           x + self.tile_x] = 1

            # Remove full lines.
            removed_lines = 0
            for y in range(self.rows - 1, -1, -1):
                if np.sum(self.board[y, :]) == self.cols:
                    removed_lines += 1
                    for y1 in range(y, self.rows - 1):
                        self.board[y1, :] = self.board[y1 + 1, :]
                    self.board[self.rows - 1, :] = Tetris.UNDEFINED
            
            self.next_tile()

In [None]:
### version 5 - FINAL > for_tetris.py

import numpy as np
import random

class Tetris:
    """Simplified Tetris."""
    
    TILES = [
        [
            [[0, 2]],  # Tile 0, orientation 0.
            [[0, 1], [0, 1]],  # Tile 0, orientation 1.
        ],
        [
            [[0, 1], [1, 2]],  # Tile 1, orientation 0.
            [[1, 2], [0, 1]],  # Tile 1, orientation 0.
        ],
        [
            [[0, 2], [1, 2]],  # Tile 2, orientation 0.
            [[0, 2], [0, 1]],  # Tile 2, orientation 1.
            [[0, 1], [0, 2]],  # Tile 2, orientation 2.
            [[1, 2], [0, 2]],  # Tile 2, orientation 3.
        ],
        [
            [[0, 2], [0, 2]],  # Tile 3, orientation 0.
        ],
    ]
    
    UNDEFINED = -1

    def __init__(self, rows, cols, max_tiles, random_seed):
        """Initialize Tetris."""
        
        self.rows, self.cols = rows, cols
        self.max_tiles = max_tiles
        self.random_seed = random_seed
        
        self.restart()
        
    def restart(self):
        """Restart the game."""
        
        self.board = np.full((self.rows, self.cols), Tetris.UNDEFINED)
    
        self.current_tile = Tetris.UNDEFINED
        self.tile_x = Tetris.UNDEFINED
        self.tile_y = Tetris.UNDEFINED
        self.tile_orientation = Tetris.UNDEFINED

        self.gameover = False
        self.tile_count = 0
        self.reward = 0

        # Create predefined tile sequence.
        rand_state = random.getstate()
        random.seed(self.random_seed)
        self.tile_sequence = [random.randint(0, len(Tetris.TILES) - 1) 
                              for x in range(self.max_tiles)]
        random.setstate(rand_state)
        
        self.next_tile()
        
    def next_tile(self):
        """Get the next tile."""
        
        if self.tile_count < self.max_tiles:
            if self.random_seed is not None:
                self.current_tile = self.tile_sequence[self.tile_count]
            else:
                self.current_tile = random.randint(0, len(Tetris.TILES) - 1)

            self.tile_x = self.cols // 2
            self.tile_y = self.rows
            self.tile_orientation = 0
            
            self.tile_count += 1
        else:
            self.gameover = True
                
    def move_left(self):
        """Move current tile to the left."""
        
        if self.tile_x - 1 >= 0:
            self.tile_x -= 1
            return True
        else:
            return False
    
    def move_right(self):
        """Move current tile to the right."""

        tile_width = len(Tetris.TILES[self.current_tile][self.tile_orientation])
        if self.tile_x + 1 <= self.cols - tile_width:
            self.tile_x += 1
            return True
        else:
            return False
    
    def rotate(self):
        """Rotate current tile."""

        new_orientation = ((self.tile_orientation + 1) 
                           % len(Tetris.TILES[self.current_tile]))
        tile_width = len(Tetris.TILES[self.current_tile][new_orientation])
        if self.tile_x <= self.cols - tile_width:
            self.tile_orientation = new_orientation
            return True
        else:
            return False
        
    def drop(self):
        """Drop current tile and update game board."""

        tile = Tetris.TILES[self.current_tile][self.tile_orientation]
        
        # Find first location where the piece collides with occupied locations.
        self.tile_y = 0
        for x in range(len(tile)):
            cury = -1
            for y in range(self.rows -1, -1, -1):
                if self.board[y, self.tile_x + x] > 0:
                    # Calculate the y position for this column 
                    # if no other columns are taken into account.
                    cury = y + 1 - tile[x][0]
                    break
            if self.tile_y < cury:
                self.tile_y = cury

        if self.tile_y + np.max(tile) > self.rows:
            self.gameover = True
            dreward = -100
        else:            
            # Change board entries at the newly placed tile to occupied.
            for x in range(len(tile)):
                self.board[self.tile_y + tile[x][0]:self.tile_y + tile[x][1], 
                           x + self.tile_x] = 1

            # Remove full lines.
            removed_lines = 0
            for y in range(self.rows - 1, -1, -1):
                if np.sum(self.board[y, :]) == self.cols:
                    removed_lines += 1
                    for y1 in range(y, self.rows - 1):
                        self.board[y1, :] = self.board[y1 + 1, :]
                    self.board[self.rows - 1, :] = Tetris.UNDEFINED
            dreward = 10 ** (removed_lines - 1) if removed_lines > 0 else 0
            
            self.next_tile()
        
        self.reward += dreward
        
        return dreward

In [None]:
#from for_tetris import Tetris

tetris = Tetris(rows=4, cols=4, max_tiles=50, random_seed=123456)

### Playing Tetris with the Command Line

In [None]:
while not tetris.gameover:
    print(f"Tile {tetris.tile_count}/{tetris.max_tiles}")
    print(f"Reward: {tetris.reward}")
    print(f"Current tile {tetris.current_tile} with "
          f"orientation {tetris.tile_orientation} at position {tetris.tile_x}")
    print(tetris.TILES[tetris.current_tile][tetris.tile_orientation])
    print(tetris.board)
    
    cmd = input("Please enter your command (L, R, O, D, X): ").upper()
    print(f"Your input: {cmd}")

    if cmd == "L":
        tetris.move_left()
    elif cmd == "R":
        tetris.move_right()
    elif cmd == "O":
        tetris.rotate()
    elif cmd == "D":
        tetris.drop()
    elif cmd == "X":
        break


### Playing Tetris with GUI

In [None]:
### FINAL > for_tetris.py

import pygame

def play_tetris_with_gui(tetris):
    """Play Tetris with GUI for human players."""

    # GUI parameters.
    TILE_SIZE = 20

    BLACK = (0, 0, 0)
    GREY = (128, 128, 128)
    WHITE = (255, 255, 255)
    RED =  (255, 0, 0)

    # Initialize the game engine.
    pygame.init()
    pygame.display.set_caption("TETRIS")
    screen = pygame.display.set_mode((200 + tetris.cols * TILE_SIZE, 
                                    200 + tetris.rows * TILE_SIZE))
    pygame.key.set_repeat(300, 100)  # Set keyboard delay and interval in ms.
    font = pygame.font.SysFont("Calibri", 25, True)

    # Loop until the window is closed.
    running = True
    while running:    
        # Paint game board.
        if pygame.display.get_active():
            screen.fill(WHITE)

            for i in range(tetris.rows):
                for j in range(tetris.cols):
                    pygame.draw.rect(
                        screen, 
                        GREY, 
                        [100 + TILE_SIZE * j, 
                         80 + TILE_SIZE * (tetris.rows - i), 
                         TILE_SIZE, 
                         TILE_SIZE], 
                        1
                    )
                    if tetris.board[i][j] > 0:
                        pygame.draw.rect(
                            screen, 
                            BLACK,
                            [101 + TILE_SIZE * j, 
                             81 + TILE_SIZE * (tetris.rows - i), 
                             TILE_SIZE - 2, 
                             TILE_SIZE - 2],
                        )
            
            tile = tetris.TILES[tetris.current_tile][tetris.tile_orientation]
            for x in range(len(tile)):
                for y in range(tile[x][0], tile[x][1]):
                    pygame.draw.rect(
                        screen,
                        RED,
                        [101 + TILE_SIZE * (x + tetris.tile_x), 
                         81 + TILE_SIZE * (tetris.rows - (y + tetris.tile_y)), 
                         TILE_SIZE - 2,
                         TILE_SIZE - 2]
                    )
            
            screen.blit(
                font.render(f"Reward: {tetris.reward}", True, BLACK), 
                [0, 0]
            )
            screen.blit(
                font.render(f"Tile {tetris.tile_count}/{tetris.max_tiles}", 
                            True, BLACK), 
                [0, 30]
            )
            if tetris.gameover:
                screen.blit(font.render("G A M E   O V E R", True, RED), 
                            [40, 100 + tetris.rows * TILE_SIZE])
                screen.blit(font.render("Press ESC to try again", True, RED), 
                            [10, 100 + tetris.rows * TILE_SIZE + 30])

        pygame.display.flip()
        
        # Get user input.
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE:
                    tetris.restart()
                if not tetris.gameover:
                    if event.key == pygame.K_LEFT:
                        tetris.move_left()
                    elif event.key == pygame.K_RIGHT:
                        tetris.move_right()
                    elif event.key == pygame.K_UP:
                        tetris.rotate()
                    elif event.key == pygame.K_DOWN:
                        tetris.drop()    
                
    pygame.quit()


In [None]:
#from for_tetris import tetris_gui

tetris.restart()
play_tetris_with_gui(tetris)

## Q-Learning

### Adapt Tetris for Q-learning with teleportation

In [None]:
### FINAL > for_tetris.py
 
class QLTetris(Tetris):
    """Simplified Tetris for Q-learning."""
    
    def __init__(self, rows, cols, max_tiles, random_seed):
        """Initialize Tetris for Q-learning."""
        
        super().__init__(rows, cols, max_tiles, random_seed)
    
    def teleport(self, new_x, new_orientation):
        """Teleport current tile to new position and orientation."""
            
        if 0 <= new_orientation < len(Tetris.TILES[self.current_tile]):
            tile_width = len(Tetris.TILES[self.current_tile][new_orientation])
            if 0 <= new_x <= self.cols - tile_width:
                self.tile_x = new_x
                self.tile_orientation = new_orientation
                return True
        return False


In [None]:
# from for_tetris import QLTetris

qltetris = QLTetris(rows=4, cols=4, max_tiles=50, random_seed=123456)

### QLAgent

In [None]:
### version 1
# Agent that picks a random action and tries the teleportation and drop off.
# Nothing happens if teleportation is to illegal place

class QLAgent:
    """Q-learning agent to play Tetris."""
    
    def __init__(self, tetris, games):
        """Initialize the agent."""
        
        self.tetris = tetris
        
        self.games = games
        self.game = 0
        
        self.position_num = self.tetris.rows
        self.orientation_num = np.max([len(tile) for tile in Tetris.TILES])
        self.action_num = self.position_num * self.orientation_num

    def next_turn(self):
        """Executes the next turn in the game."""
        
        if self.tetris.gameover:
            self.game += 1
            if self.game < self.games:
                self.tetris.restart()
            else:
                return False  # Finish.
        else:
            # Select action.
            action = np.random.randint(self.action_num)
            
            # Extract rotation and movement from action parameter.
            new_x = action // self.position_num
            new_orientation = action % self.orientation_num

            # Execute action and drop tile.
            if self.tetris.teleport(new_x, new_orientation):                    ### Note that nothing happens if the teleportation is not valid.
                self.tetris.drop()
            
        return True  # Continue.



In [None]:
### FINAL > for_tetris.py

def observe_tetris_with_gui(tetris, agent):
    """Observe with GUI Tetris played by a QL agent."""

    # GUI parameters.
    TILE_SIZE = 20

    BLACK = (0, 0, 0)
    GREY = (128, 128, 128)
    WHITE = (255, 255, 255)
    RED =  (255, 0, 0)

    # Initialize the game engine.
    pygame.init()
    pygame.display.set_caption("TETRIS")
    screen = pygame.display.set_mode((200 + tetris.cols * TILE_SIZE, 
                                    200 + tetris.rows * TILE_SIZE))
    pygame.key.set_repeat(300, 100)  # Set keyboard delay and interval in ms.
    font = pygame.font.SysFont("Calibri", 25, True)

    # Loop until the window is closed.
    running = True
    while running:    
        # Paint game board.
        if pygame.display.get_active():
            screen.fill(WHITE)

            for i in range(tetris.rows):
                for j in range(tetris.cols):
                    pygame.draw.rect(
                        screen, 
                        GREY, 
                        [100 + TILE_SIZE * j, 
                         80 + TILE_SIZE * (tetris.rows - i), 
                         TILE_SIZE, 
                         TILE_SIZE], 
                        1
                    )
                    if tetris.board[i][j] > 0:
                        pygame.draw.rect(
                            screen, 
                            BLACK,
                            [101 + TILE_SIZE * j, 
                             81 + TILE_SIZE * (tetris.rows - i), 
                             TILE_SIZE - 2, 
                             TILE_SIZE - 2],
                        )
            
            tile = tetris.TILES[tetris.current_tile][tetris.tile_orientation]
            for x in range(len(tile)):
                for y in range(tile[x][0], tile[x][1]):
                    pygame.draw.rect(
                        screen,
                        RED,
                        [101 + TILE_SIZE * (x + tetris.tile_x), 
                         81 + TILE_SIZE * (tetris.rows - (y + tetris.tile_y)), 
                         TILE_SIZE - 2,
                         TILE_SIZE - 2]
                    )
            
            screen.blit(
                font.render(f"Reward: {tetris.reward}", True, BLACK), 
                [0, 0]
            )
            screen.blit(
                font.render(f"Tile {tetris.tile_count}/{tetris.max_tiles}", 
                            True, BLACK), 
                [0, 30]
            )
            if tetris.gameover:
                screen.blit(font.render("G A M E   O V E R", True, RED), 
                            [40, 100 + tetris.rows * TILE_SIZE])
                screen.blit(font.render("Press ESC to try again", True, RED), 
                            [10, 100 + tetris.rows * TILE_SIZE + 30])

        pygame.display.flip()
        
        # Get user input.
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.KEYDOWN:
                running = agent.next_turn()
                
    pygame.quit()


In [None]:
#from for_tetris import observe_tetris_with_gui

qltetris.restart()
agent = QLAgent(qltetris, games=3)
observe_tetris_with_gui(qltetris, agent)

In [None]:
### version 2
# Adds the states

class QLAgent:
    """Q-learning agent to play Tetris."""
    
    def __init__(self, tetris, games):
        """Initialize the agent."""
        
        self.tetris = tetris
        
        self.games = games
        self.game = 0
        
        self.position_num = self.tetris.rows
        self.orientation_num = np.max([len(tile) for tile in Tetris.TILES])
        self.action_num = self.position_num * self.orientation_num

        self.state_size = (
            self.tetris.cols * self.tetris.rows  # Cells in board.
            + 1 + np.floor(np.log2(len(Tetris.TILES) - 1)).astype(int)  # Tiles
        )
        self.state_num = 2 ** self.state_size

        self.update_state()

    def update_state(self):
        """Update the state of the agent."""
        
        # Convert tile to binary list.
        tile = bin(self.tetris.current_tile)[2:]

        # Convert board to binary list.
        board = np.copy(self.tetris.board.reshape((-1,))).astype(int)
        board[board == Tetris.UNDEFINED] = 0
        
        self.state_binary = np.append(tile, board)
        self.state = int("".join(str(i) for i in self.state_binary), 2)

    def next_turn(self):
        """Executes the next turn in the game."""
        
        if self.tetris.gameover:
            self.game += 1
            if self.game < self.games:
                self.tetris.restart()
            else:
                return False  # Finish.
        else:
            old_state = self.state

            # Select action.
            action = np.random.randint(self.action_num)
            
            # Extract rotation and movement from action parameter.
            new_x = action // self.position_num
            new_orientation = action % self.orientation_num

            # Execute action and drop tile.
            if self.tetris.teleport(new_x, new_orientation):                    ### Note that nothing happens if the teleportation is not valid.
                self.tetris.drop()
            
                # Update the state.
                self.update_state()
                new_state = self.state

        return True  # Continue.


In [None]:
qltetris.restart()
agent = QLAgent(qltetris, games=3)
observe_tetris_with_gui(qltetris, agent)

In [None]:
### version 3
# Choose strategy from Q-matrix - note that it teleports all tiles to position 0 and orientation 0

class QLAgent:
    """Q-learning agent to play Tetris."""
    
    def __init__(self, tetris, games, epsilon):
        """Initialize the agent."""
        
        self.tetris = tetris
        
        self.games = games
        self.game = 0
        
        self.position_num = self.tetris.rows
        self.orientation_num = np.max([len(tile) for tile in Tetris.TILES])
        self.action_num = self.position_num * self.orientation_num

        self.state_size = (
            self.tetris.cols * self.tetris.rows  # Cells in board.
            + 1 + np.floor(np.log2(len(Tetris.TILES) - 1)).astype(int)  # Tiles
        )
        self.state_num = 2 ** self.state_size

        self.epsilon = epsilon  # Probability to choose a random action in the epsilon-greedy policy.
        self.Q_table = np.zeros((self.state_num, self.action_num))

        self.update_state()

    def update_state(self):
        """Update the state of the agent."""
        
        # Convert tile to binary list.
        tile = bin(self.tetris.current_tile)[2:]

        # Convert board to binary list.
        board = np.copy(self.tetris.board.reshape((-1,))).astype(int)
        board[board == Tetris.UNDEFINED] = 0
        
        self.state_binary = np.append(tile, board)
        self.state = int("".join(str(i) for i in self.state_binary), 2)

    def next_turn(self):
        """Executes the next turn in the game."""
        
        if self.tetris.gameover:
            self.game += 1
            if self.game < self.games:
                self.tetris.restart()
            else:
                return False  # Finish.
        else:
            old_state = self.state

            # Select action.
            if np.random.rand() < self.epsilon:
                action = np.random.randint(self.action_num)
            else:
                action = np.argmax(self.Q_table[old_state, :])
            
            # Extract rotation and movement from action parameter.
            new_x = action // self.position_num
            new_orientation = action % self.orientation_num

            # Execute action and drop tile.
            if self.tetris.teleport(new_x, new_orientation):                    ### Note that nothing happens if the teleportation is not valid.
                self.tetris.drop()
            
                # Update the state.
                self.update_state()
                new_state = self.state

        return True  # Continue.


In [None]:
qltetris.restart()
agent = QLAgent(qltetris, games=3, epsilon=0)
observe_tetris_with_gui(qltetris, agent)

In [None]:
### version 4 - FINAL > for_tetris.py
# Complete version, also training the Q-matrix

class QLAgent:
    """Q-learning agent to play Tetris."""
    
    def __init__(self, tetris, games, epsilon, alpha, gamma):
        """Initialize the agent."""
        
        self.tetris = tetris
        
        self.games = games
        self.game = 0
        
        self.position_num = self.tetris.rows
        self.orientation_num = np.max([len(tile) for tile in Tetris.TILES])
        self.action_num = self.position_num * self.orientation_num

        self.state_size = (
            self.tetris.cols * self.tetris.rows  # Cells in board.
            + 1 + np.floor(np.log2(len(Tetris.TILES) - 1)).astype(int)  # Tiles
        )
        self.state_num = 2 ** self.state_size

        self.epsilon = epsilon  # Probability to choose a random action in the epsilon-greedy policy.
        self.Q_table = np.zeros((self.state_num, self.action_num))

        self.alpha = alpha  # Alpha is the learning rate.
        self.gamma = gamma  # Discount factor.
        self.rewards = np.zeros(games)

        self.update_state()

    def update_state(self):
        """Update the state of the agent."""
        
        # Convert tile to binary list.
        tile = bin(self.tetris.current_tile)[2:]

        # Convert board to binary list.
        board = np.copy(self.tetris.board.reshape((-1,))).astype(int)
        board[board == Tetris.UNDEFINED] = 0
        
        self.state_binary = np.append(tile, board)
        self.state = int("".join(str(i) for i in self.state_binary), 2)

    def next_turn(self):
        """Executes the next turn in the game."""
        
        if self.tetris.gameover:
            self.rewards[self.game] = self.tetris.reward
            if self.game % 100 == 0:
                av_reward = np.mean(self.rewards[self.game - 100:self.game])
                print(f"game {self.game}/{self.games} reward {av_reward}")

            self.game += 1
            if self.game < self.games:
                self.tetris.restart()
            else:
                np.savetxt('Q_table.txt', self.Q_table)
                return False  # Finish.
        else:
            old_state = self.state

            # Select action.
            if np.random.rand() < self.epsilon:
                action = np.random.randint(self.action_num)
            else:
                action = np.argmax(self.Q_table[old_state, :])
            
            # Extract rotation and movement from action parameter.
            new_x = action // self.position_num
            new_orientation = action % self.orientation_num

            # Execute action and drop tile.
            if self.tetris.teleport(new_x, new_orientation):                    ### Note that nothing happens if the teleportation is not valid.
                reward = self.tetris.drop()
            
                # Update the state.
                self.update_state()
                new_state = self.state

                # Update the Q-table using the old state and the reward.
                dQ = self.alpha * (
                    reward
                    + self.gamma * np.max(self.Q_table[new_state, :])
                    - self.Q_table[old_state, action]
                )
                            
                # Update the Q-table.
                self.Q_table[old_state, action] += dQ
            else:  # Penalty for illegal move.
                self.Q_table[old_state, action] += -50

        return True  # Continue.


In [None]:
qltetris.restart()
agent = QLAgent(qltetris, games=10_000, epsilon=0, alpha=0.2, gamma=1)

while agent.next_turn():
    pass

In [None]:
import matplotlib.pyplot as plt

smoothed_rewards = np.convolve(agent.rewards, np.ones(100) / 100, mode='valid')

plt.plot(smoothed_rewards)
plt.ylabel('Reward')
plt.xlabel('Episode')
plt.show()

In [None]:
qltetris.restart()
agent = QLAgent(qltetris, games=3, epsilon=0, alpha=0.2, gamma=1)
agent.Q_table = np.loadtxt("Q_table.txt")
observe_tetris_with_gui(qltetris, agent)

In [None]:
# Note how only few lines are non-zero.

Q_table = agent.Q_table
non_zero_rows_mask = np.any(Q_table != 0, axis=1)
num_non_zero_rows = np.sum(non_zero_rows_mask)
print(f"Non-zero rows = {num_non_zero_rows}")

### Try also with epsilon = 0.001 and games = 100_000

In [None]:
# Try also with epsilon = 0.001 and games = 100_000

qltetris.restart()
agent = QLAgent(qltetris, games=100_000, epsilon=0.001, alpha=0.2, gamma=1)

while agent.next_turn():
    pass

In [None]:
import matplotlib.pyplot as plt

smoothed_rewards = np.convolve(agent.rewards, np.ones(100) / 100, mode='valid')

plt.plot(smoothed_rewards)
plt.ylabel('Reward')
plt.xlabel('Episode')
plt.show()

In [None]:
qltetris.restart()
agent = QLAgent(qltetris, games=3, epsilon=0, alpha=0.2, gamma=1)
agent.Q_table = np.loadtxt("Q_table.txt")
observe_tetris_with_gui(qltetris, agent)

In [None]:
# Interesting to note that the number of non-zero rows increases in this case.

Q_table = agent.Q_table
non_zero_rows_mask = np.any(Q_table != 0, axis=1)
num_non_zero_rows = np.sum(non_zero_rows_mask)
print(f"Non-zero rows = {num_non_zero_rows}")

## Q-Learning with a Random Tile Sequence

In [None]:
qltetris = QLTetris(rows=4, cols=4, max_tiles=50, random_seed=None)
agent = QLAgent(qltetris, games=1_000_000, epsilon=0.001, alpha=0.2, gamma=1)

while agent.next_turn():
    pass

In [None]:
import matplotlib.pyplot as plt

smoothed_rewards = np.convolve(agent.rewards, np.ones(100) / 100, mode='valid')

plt.plot(smoothed_rewards)
plt.ylabel('Reward')
plt.xlabel('Episode')

In [None]:
qltetris.restart()
agent = QLAgent(qltetris, games=100, epsilon=0, alpha=0.2, gamma=1)
agent.Q_table = np.loadtxt("Q_table.txt")
observe_tetris_with_gui(qltetris, agent)

In [None]:
# Interesting to note that the number of non-zero rows increases in this case.

Q_table = agent.Q_table
non_zero_rows_mask = np.any(Q_table != 0, axis=1)
num_non_zero_rows = np.sum(non_zero_rows_mask)
print(f"Non-zero rows = {num_non_zero_rows}")