## Tetris played by human

## Q-Learning with Fixed Sequence

In [1]:
import numpy as np
import random

class Tetris:
    
    UNDEFINED = -1
    
    TILES = [
        [
            [[0, 2]],  # Tile 0, orientation 0.
            [[0, 1], [0, 1]],  # Tile 0, orientation 1.
        ],
        [
            [[0, 1], [1, 2]],  # Tile 1, orientation 0.
            [[1, 2], [0, 1]],  # Tile 1, orientation 0.
        ],
        [
            [[0, 2], [1, 2]],  # Tile 2, orientation 0.
            [[0, 2], [0, 1]],  # Tile 2, orientation 1.
            [[0, 1], [0, 2]],  # Tile 2, orientation 2.
            [[1, 2], [0, 2]],  # Tile 2, orientation 3.
        ],
        [
            [[0, 2], [0, 2]],  # Tile 4, orientation 0.
        ],
    ]

    def __init__(self, rows, cols, max_tiles, random_seed):
        self.rows, self.cols = rows, cols
        self.max_tiles = max_tiles
        self.random_seed = random_seed
        
        self.start()
        
    def start(self):
        self.gameover = False
        self.tile_count = 0
        self.reward = 0
        self.board = np.full((self.rows, self.cols), self.UNDEFINED)
        self.current_tile = self.UNDEFINED
        self.tile_x = self.UNDEFINED
        self.tile_y = self.UNDEFINED
        self.tile_orientation = self.UNDEFINED
        
        # Create predefined tile sequence, used if stochastic_prob=0
        rand_state = random.getstate()
        random.seed(self.random_seed)
        self.tile_sequence = [random.randint(0, len(self.TILES) - 1) 
                              for x in range(self.max_tiles)]
        random.setstate(rand_state)

        self.next_tile()
        
    def next_tile(self):
        if self.tile_count < self.max_tiles:
            self.current_tile = self.tile_sequence[self.tile_count]
            self.tile_x = self.cols // 2
            self.tile_y = self.rows
            self.tile_orientation = 0
            self.tile_count += 1
        else:
            self.gameover = True
    
    def move_left(self):
        if self.tile_x - 1 >= 0:
            self.tile_x -= 1
            return True
        else:
            return False
    
    def move_right(self):
        tile_width = len(self.TILES[self.current_tile][self.tile_orientation])
        if self.tile_x + 1 <= self.cols - tile_width:
            self.tile_x += 1
            return True
        else:
            return False
    
    def rotate(self):
        new_orientation = ((self.tile_orientation + 1) 
                           % len(self.TILES[self.current_tile]))
        tile_width = len(self.TILES[self.current_tile][new_orientation])
        if self.tile_x <= self.cols - tile_width:
            self.tile_orientation = new_orientation
            return True
        else:
            return False
        
    def drop(self):
        tile = self.TILES[self.current_tile][self.tile_orientation]
        
        # Find first location where the piece collides with occupied locations.
        self.tile_y = 0
        for x in range(len(tile)):
            # Find first occupied location in this column            
            cury = -1
            for y in range(self.rows -1, -1, -1):
                if self.board[y, self.tile_x + x] > 0:
                    # Calculate the y position for this column if no other columns are taken into account
                    cury = y + 1 - tile[x][0]
                    break
            # Use the largest y position for all columns of the tile
            if self.tile_y < cury:
                self.tile_y = cury

        if self.tile_y + np.max(tile) > self.rows:
            self.gameover = True
            dreward = -100
        else:
            # Change board entries at the newly placed tile to occupied.
            for x in range(len(tile)):
                self.board[self.tile_y + tile[x][0]:self.tile_y + tile[x][1], 
                           x + self.tile_x] = 1

            # Remove full lines.
            removed_lines = 0
            for y in range(self.rows - 1, -1, -1):
                if np.sum(self.board[y, :]) == self.cols:
                    removed_lines += 1
                    for y1 in range(y, self.rows - 1):
                        self.board[y1, :] = self.board[y1 + 1, :]
                    self.board[self.rows - 1, :] = self.UNDEFINED
            
            dreward = 10 ** (removed_lines - 1) if removed_lines > 0 else 0
            
            # Choose the next tile.
            self.next_tile()
        
        self.reward += dreward
        
        return dreward

In [2]:
class QLAgent:
    
    def __init__(self, alpha, epsilon, games, **kwargs):
        self.alpha = alpha  # Alpha is the learning rate.
        self.epsilon = epsilon  # Probability to choose a random action in the epsilon-greedy policy.
        self.games = games
        self.game = 0
        self.tetris = Tetris(**kwargs)
        
        self.rewards = np.zeros(games)

        self.state_size = (
            self.tetris.cols * self.tetris.rows  # Cells in board.
            + 1 + np.floor(np.log2(len(Tetris.TILES) - 1)).astype(int)  # Tile
            + 1 + np.floor(np.log2(np.max([len(tile) for tile in Tetris.TILES]) - 1)).astype(int)  # Rotations.
            + 1 + np.floor(np.log2(self.tetris.rows - 1)).astype(int)  # Positions.
        )
        self.state_num = 2 ** self.state_size
        
        self.action_num = 4  # Left, Right, Rotate, Drop.

        self.Q_table = np.zeros((self.state_num, self.action_num))
        self.Q_target = self.Q_table
        
        self.update_state()

    def update_state(self):
        # Convert board to binary list.
        board = np.copy(self.tetris.board.reshape((-1,))).astype(int)
        board[board == Tetris.UNDEFINED] = 0

        # Convert tile to binary list.
        tile = bin(4 +self.tetris.current_tile)[3:]
        
        # Convert orientation to binary list.
        orientation = bin(4 + self.tetris.tile_orientation)[3:]
        
        # Convert position to binary list.
        position = bin(4 + self.tetris.tile_x)[3:]
        
        self.state_binary = np.append(board, [tile, orientation, position])
        self.state = int("".join(str(i) for i in self.state_binary), 2)

    def next_turn(self):
        if self.tetris.gameover:
            self.rewards[self.game] = self.tetris.reward
            if self.game % 100 == 0:
                print(f"game {self.game}/{self.games} reward {self.rewards[self.game]}")
            
            self.game += 1
            if self.game < self.games:
                self.tetris.start()
            else:
                np.savetxt('Q_table.txt', self.Q_table)
                return False  # Finish.
        else:
            old_state = self.state
            
            # Select action.
            if np.random.rand() < self.epsilon:
                action = np.random.randint(self.action_num)
            else:
                action = np.argmax(self.Q_table[old_state, :])
            
            # Execute action.
            if action == 0:
                reward = 0 if self.tetris.move_left() else -50  # Penalty for illegal move.
            elif action == 1:
                reward = 0 if self.tetris.move_right() else -50  # Penalty for illegal move.
            elif action == 2:
                reward = 0 if self.tetris.rotate() else -50  # Penalty for illegal move.
            elif action == 3:
                reward = self.tetris.drop()
                
            # Update the state.
            self.update_state()
            new_state = self.state
            
            # Update the Q-table using the old state and the reward.
            dQ = self.alpha * (reward                                           - .05
                            + np.max(self.Q_table[new_state, :])
                            - self.Q_table[old_state, action])
                        
            # Update the Q-table.
            self.Q_table[old_state, action] += dQ
            
        return True  # Continue.

In [3]:
agent = QLAgent(
    alpha=0.2,
    epsilon=0.1, 
    games=10000, 
    rows=4,  # From here keyword arguments for Tetris.
    cols=4, 
    max_tiles=50, 
    random_seed=123456,
)  

In [4]:
import pygame

tetris = agent.tetris

BLACK = (0, 0, 0)
GREY = (128, 128, 128)
WHITE = (255, 255, 255)
RED =  (255, 0, 0)

TILE_SIZE = 20

# Initialize the game engine.
pygame.init()
pygame.display.set_caption("Tetris")
screen = pygame.display.set_mode((200 + tetris.cols * TILE_SIZE, 
                                  200 + tetris.rows * TILE_SIZE))
pygame.key.set_repeat(300, 100)  # Set keyboard delay and interval in ms.
font = pygame.font.SysFont("Calibri", 25, True)

# Loop until the window is closed.
running = True
while running:
    
    # Get agent input.
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.KEYDOWN:
            agent.next_turn()
    
    # Paint game board.
    if pygame.display.get_active():
        screen.fill(WHITE)

        for i in range(tetris.rows):
            for j in range(tetris.cols):
                pygame.draw.rect(
                    screen, 
                    GREY, 
                    [100 + TILE_SIZE * j, 80 + TILE_SIZE * (tetris.rows - i), TILE_SIZE, TILE_SIZE], 
                    1
                )
                if tetris.board[i][j] > 0:
                    pygame.draw.rect(
                        screen, 
                        BLACK,
                        [101 + TILE_SIZE * j, 81 + TILE_SIZE * (tetris.rows - i), TILE_SIZE - 2, TILE_SIZE - 2]
                    )
        
        tile = tetris.TILES[tetris.current_tile][tetris.tile_orientation]
        for x in range(len(tile)):
            for y in range(tile[x][0], tile[x][1]):
                pygame.draw.rect(
                    screen,
                    RED,
                    [101 + TILE_SIZE * (x + tetris.tile_x), 81 + TILE_SIZE * (tetris.rows - (y + tetris.tile_y)), TILE_SIZE - 2, TILE_SIZE - 2]
                )
        
        screen.blit(font.render(f"Reward: {tetris.reward}", True, BLACK), [0, 0])
        screen.blit(font.render(f"Tile {tetris.tile_count}/{tetris.max_tiles}", True, BLACK), [0, 30])
        if tetris.gameover:
            screen.blit(font.render("G A M E   O V E R", True, RED), [40, 100 + tetris.rows * TILE_SIZE])
            screen.blit(font.render("Press ESC to try again", True, RED), [10, 100 + tetris.rows * TILE_SIZE + 30])

    pygame.display.flip()
            
pygame.quit()

pygame 2.5.2 (SDL 2.28.3, Python 3.10.13)
Hello from the pygame community. https://www.pygame.org/contribute.html
game 0/10000 reward -100.0


In [5]:
while agent.next_turn():
    pass

game 100/10000 reward -100.0
game 200/10000 reward -98.0
game 300/10000 reward -100.0
game 400/10000 reward -93.0
game 500/10000 reward -99.0
game 600/10000 reward -92.0
game 700/10000 reward -96.0
game 800/10000 reward -100.0
game 900/10000 reward -95.0
game 1000/10000 reward -98.0
game 1100/10000 reward -93.0
game 1200/10000 reward -93.0
game 1300/10000 reward -100.0
game 1400/10000 reward -93.0
game 1500/10000 reward -98.0
game 1600/10000 reward -94.0
game 1700/10000 reward -98.0
game 1800/10000 reward -97.0
game 1900/10000 reward -98.0
game 2000/10000 reward -99.0
game 2100/10000 reward -93.0
game 2200/10000 reward -97.0
game 2300/10000 reward -93.0
game 2400/10000 reward -93.0
game 2500/10000 reward -97.0
game 2600/10000 reward -98.0
game 2700/10000 reward -100.0
game 2800/10000 reward -93.0
game 2900/10000 reward -93.0
game 3000/10000 reward -93.0
game 3100/10000 reward -93.0
game 3200/10000 reward -96.0
game 3300/10000 reward -99.0
game 3400/10000 reward -97.0
game 3500/10000 re