In [11]:
import tetris_engine as tr
import numpy as np
np.set_printoptions(linewidth=300)
from IPython.display import clear_output
import random as rnd
import argparse
import os
import shutil
from random import random, randint, sample, seed

import numpy as np
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

import copy

from collections import deque

# import os, sys
# os.environ["SDL_VIDEODRIVER"] = "dummy"

In [12]:
env = tr.GameState()

In [3]:
"""
@author: Viet Nguyen <nhviet1009@gmail.com>
""" 
import torch.nn as nn

class DeepQNetwork(nn.Module):
    def __init__(self):
        super(DeepQNetwork, self).__init__()

        self.conv1 = nn.Sequential(nn.Linear(5, 128), nn.BatchNorm1d(128), nn.LeakyReLU(inplace=True))
        self.conv2 = nn.Sequential(nn.Linear(128, 128),nn.BatchNorm1d(128), nn.LeakyReLU(inplace=True))
        self.conv3 = nn.Sequential(nn.Linear(128, 128),nn.BatchNorm1d(128), nn.LeakyReLU(inplace=True))
        self.conv4 = nn.Sequential(nn.Linear(128, 1))

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)

        return x

In [4]:
class LinearBlock(nn.Module):
    """
    Linear layer with ReLU and BatchNorm
    """
    def __init__(self, input_prev, embed_dim):
        super(LinearBlock, self).__init__()
        self.fc = nn.Linear(input_prev, embed_dim)
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm1d(embed_dim)

    def forward(self, inputs):
        x = inputs
        x = self.fc(x)
        x = self.relu(x)
        x = self.bn(x)
        return x


class ResidualBlock(nn.Module):
    """
    Residual block with two linear layers
    """
    def __init__(self, embed_dim):
        super(ResidualBlock, self).__init__()
        self.linearblock_1 = LinearBlock(embed_dim, embed_dim)
        self.linearblock_2 = LinearBlock(embed_dim, embed_dim)

    def forward(self, inputs):
        x = inputs
        x = self.linearblock_1(x)
        x = self.linearblock_2(x)
        x += inputs # skip-connection
        return x


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.one_hot = nn.functional.one_hot
        self.Stack = nn.Sequential(
            LinearBlock(6, 32),
            LinearBlock(32, 128),
            LinearBlock(128, 500),
            ResidualBlock(500),
            ResidualBlock(500),
            ResidualBlock(500),
            ResidualBlock(500),
        )
        self.Prediction = nn.Linear(500, 1)

    def forward(self, inputs):
        x = inputs
        # x = self.one_hot(x, num_classes=6).to(torch.float).reshape(-1, 324)
        x = self.Stack(x)
        logits = self.Prediction(x)
        return logits

In [4]:
class opt():
    batch_size = 512
    lr = 3e-3
    gamma = 0.92
    initial_epsilon=1
    final_epsilon=5e-3
    num_decay_epochs=2000
    num_epochs=40000
    save_interval=5000
    replay_memory_size=30000
    target_step = 1
    log_path="tensorboard/lr0.003"
    saved_path="trained_models"

In [18]:
class LinearBlock(nn.Module):
    """
    Linear layer with ReLU and BatchNorm
    """
    def __init__(self, input_prev, embed_dim):
        super(LinearBlock, self).__init__()
        self.fc = nn.Linear(input_prev, embed_dim)
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm1d(embed_dim)

    def forward(self, inputs):
        x = inputs
        x = self.fc(x)
        x = self.relu(x)
        x = self.bn(x)
        return x


class ResidualBlock(nn.Module):
    """
    Residual block with two linear layers
    """
    def __init__(self, embed_dim):
        super(ResidualBlock, self).__init__()
        self.linearblock_1 = LinearBlock(embed_dim, embed_dim)
        self.linearblock_2 = LinearBlock(embed_dim, embed_dim)

    def forward(self, inputs):
        x = inputs
        x = self.linearblock_1(x)
        x = self.linearblock_2(x)
        x += inputs # skip-connection
        return x


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.one_hot = nn.functional.one_hot
        self.Stack = nn.Sequential(
            LinearBlock(207, 350),
            LinearBlock(350, 500),
            ResidualBlock(500),
            ResidualBlock(500),
            ResidualBlock(500)
        )
        self.Prediction = nn.Linear(500, 1)

    def forward(self, inputs):
        x = inputs
        # x = self.one_hot(x, num_classes=6).to(torch.float).reshape(-1, 324)
        x = self.Stack(x)
        logits = self.Prediction(x)
        return logits

In [19]:
class opt():
    model = Model()
    # model = DeepQNetwork()
    # model = torch.load('trained_models/res_arq/tetris_res_arq_3000')
    batch_size = 512
    lr = 3e-4
    gamma = 0.97
    initial_epsilon=1
    final_epsilon=5e-3
    num_decay_epochs=1000
    num_epochs=40000
    save_interval=500
    replay_memory_size=1000000
    target_step = 20
    render_every = 1
    name = "res_arq_allboard"
    log_path= f"tensorboard/{name}"
    saved_path = f"trained_models/{name}"

In [20]:
seed(3)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
switch = False

def train(opt):
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(123)
        
    else:
        torch.manual_seed(123)
    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
        
    os.makedirs(opt.log_path)
    
    if not os.path.exists(f"trained_models/{opt.name}"):
        os.makedirs(f"trained_models/{opt.name}")
    
    writer = SummaryWriter(opt.log_path)
    
    model = opt.model
    target_model = copy.deepcopy(model)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.MSELoss()
    
    state = env.reinit()
    if torch.cuda.is_available():
        model.cuda()
        target_model.cuda()
        state = state.cuda()

    replay_memory = deque(maxlen=opt.replay_memory_size)
    epoch = 0
    tbscores=[]
    tbcleared_lines=[]
    
    while epoch < opt.num_epochs:
        # print('pepe')
        next_steps = env.get_next_states()
        # Exploration or exploitation
        epsilon = opt.final_epsilon + (max(opt.num_decay_epochs - epoch, 0) * (
                opt.initial_epsilon - opt.final_epsilon) / opt.num_decay_epochs)
        u = random()
        random_action = u <= epsilon
        next_actions, next_states = zip(*next_steps.items())
        next_states = torch.stack(next_states)
        if torch.cuda.is_available():
            next_states = next_states.cuda()
        model.eval()
        with torch.no_grad():
            predictions = model(next_states)[:, 0]
        if random_action:
            index = randint(0, len(next_steps) - 1)
        else:
            index = torch.argmax(predictions).item()
        
        next_state = next_states[index, :]
        action = next_actions[index]
        
        reverse_x = env.fallingPiece['x']
        reverse_rot = env.fallingPiece['rotation']
        
        env.fallingPiece['x'] = action[0]
        env.fallingPiece['rotation'] = action[1]
        if not env.isValidPosition():
            env.fallingPiece['x'] = reverse_x
            env.fallingPiece['rotation'] = reverse_rot
            
        final_score = env.score
        final_cleared_lines = env.lines
        
        if epoch % opt.render_every == 0:
            switch = True
        
        reward, done = env.frame_step([0,0,0,0,1,0], render=switch)[1:3]
        
        switch=False
        
        if torch.cuda.is_available():
            next_state = next_state.cuda()
        replay_memory.append([state, reward, next_state, done])
        if done:
            # final_score = env.score
            # final_cleared_lines = env.lines
            state = env.reinit() 
            if torch.cuda.is_available():
                state = state.cuda()
        else:
            state = next_state
            continue
        
        if len(replay_memory) < 2000:
            continue
        epoch += 1
        # if epoch > 2300:
        #     opt.batch_size += 1
        batch = sample(replay_memory, min(len(replay_memory), opt.batch_size))
        state_batch, reward_batch, next_state_batch, done_batch = zip(*batch)
        # print(state_batch[16])
        state_batch = torch.stack(tuple(state for state in state_batch))
        reward_batch = torch.from_numpy(np.array(reward_batch, dtype=np.float32)[:, None])
        next_state_batch = torch.stack(tuple(state for state in next_state_batch))
        
        if torch.cuda.is_available():
            state_batch = state_batch.cuda()
            reward_batch = reward_batch.cuda()
            next_state_batch = next_state_batch.cuda()
            
        q_values = model(state_batch)
        
        with torch.no_grad():
            next_prediction_batch = target_model(next_state_batch)
        model.train()
        
        

        y_batch = torch.cat(
            tuple(reward if done else reward + opt.gamma * prediction for reward, done, prediction in
                  zip(reward_batch, done_batch, next_prediction_batch)))[:, None]
    
        
        optimizer.zero_grad()
        loss = criterion(q_values, y_batch)
        
        loss.backward()
        optimizer.step()
        
        writer.add_scalar('Train/Loss', loss.item()
                          # np.array(tbscores[-50:]).mean()
                          , epoch - 1)
        
        writer.add_scalar('Train/Score', final_score
                          # np.array(tbscores[-50:]).mean()
                          , epoch - 1)
        writer.add_scalar('Train/Cleared lines', final_cleared_lines
                          # np.array(tbcleared_lines[-50:]).mean()
                          , epoch - 1)
        
        if epoch % opt.target_step == 0:
            target_model.load_state_dict(model.state_dict())
            # target_model = copy.deepcopy(model)
    

        print("Epoch: {}/{}, Action: {}, Score: {}, Cleared lines: {}".format(
            epoch,
            opt.num_epochs,
            action,
            reward,
            final_cleared_lines))
        
        # tbscores.append(final_score)
        # tbcleared_lines.append(final_cleared_lines)
        
        

        if epoch > 0 and epoch % opt.save_interval == 0:
            torch.save(model, "{}/tetris_{}_{}".format(opt.saved_path, opt.name, epoch))

    torch.save(model, "{}/tetris_{}".format(opt.saved_path, opt.name))

In [21]:
train(opt)

RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [4]:
env.frame_step([0,0,0,0,1,0])[1:3]

(1, False)

In [8]:
a = [1,2,3,4,5,6,7,8,9]

In [None]:
np.array(a[-3:]).mean()

In [10]:
len([1] + env._board_to_num(env.board).tolist())

201

In [7]:
len(env.get_next_states()[(-1,0)])

207

In [None]:
col = 6
count=0
justonce=0
for ind,x in enumerate(env.board[col][::-1]):
    if x == '.':
        try:
            if env.board[col][::-1][ind+1] != '.' and justonce < 1:
                justonce+=1
                for y in env.board[col][::-1][ind:]:
                    if y != '.':
                        count+=1
        except:
            continue
print(count)

In [None]:
S
Z
J
L
I
O
T

In [36]:
env.piecewidth(env.fallingPiece)

-4

In [282]:
# env.reinit()
# env.fallingPiece['x']=1
# env.fallingPiece['y']=0
# env.fallingPiece['rotation']=3
# env.fallingPiece['shape']='L'
# env.fallingPiece['color']=0
env.frame_step([0,0,0,0,0,0])[1:3]

(4, False)

In [7]:
env.fallingPiece

{'shape': 'T', 'rotation': 1, 'x': 3, 'y': 4, 'color': 3}

In [11]:
env.reinit()

tensor([0., 0., 0., 0., 0.])

In [None]:
env._number_of_holes(env.board)

In [43]:
S_SHAPE_TEMPLATE = [['..OO.',
                     '.OO..',
                     '.....',
                     '.....',
                     '.....'],
                    ['..O..',
                     '..OO.',
                     '...O.',
                     '.....',
                     '.....']]

Z_SHAPE_TEMPLATE = [['.OO..',
                     '..OO.',
                     '.....',
                     '.....',
                     '.....'],
                    ['..O..',
                     '.OO..',
                     '.O...',
                     '.....',
                     '.....']]

I_SHAPE_TEMPLATE = [['..O..',
                     '..O..',
                     '..O..',
                     '..O..',
                     '.....'],
                    ['.....',
                     'OOOO.',
                     '.....',
                     '.....',
                     '.....']]

O_SHAPE_TEMPLATE = [['.OO..',
                     '.OO..',
                     '.....',
                     '.....',
                     '.....']]

J_SHAPE_TEMPLATE = [['.O...',
                     '.OOO.',
                     '.....',
                     '.....',
                     '.....'],
                    ['..OO.',
                     '..O..',
                     '..O..',
                     '.....',
                     '.....'],
                    ['.OOO.',
                     '...O.',
                     '.....',
                     '.....',
                     '.....'],
                    ['..O..',
                     '..O..',
                     '.OO..',
                     '.....',
                     '.....']]

L_SHAPE_TEMPLATE = [['...O.',
                     '.OOO.',
                     '.....',
                     '.....',
                     '.....'],
                    ['..O..',
                     '..O..',
                     '..OO.',
                     '.....',
                     '.....'],
                    ['.OOO.',
                     '.O...',
                     '.....',
                     '.....',
                     '.....'],
                    ['.OO..',
                     '..O..',
                     '..O..',
                     '.....',
                     '.....']]

T_SHAPE_TEMPLATE = [['..O..',
                     '.OOO.',
                     '.....',
                     '.....',
                     '.....'],
                    ['..O..',
                     '..OO.',
                     '..O..',
                     '.....',
                     '.....'],
                    ['.OOO.',
                     '..O..',
                     '.....',
                     '.....',
                     '.....'],
                    ['..O..',
                     '.OO..',
                     '..O..',
                     '.....',
                     '.....']]

In [67]:
piece = J_SHAPE_TEMPLATE[3]
minwidth=5
maxwidth=0
for x in piece:
    if 'O' in x:
        a = x.index('O')
        b = max([i if x == "O" else 0 for i, x in enumerate(x)])
        if a < minwidth:
            minwidth = a
        if b > maxwidth:
            maxwidth = b
maxwidth - minwidth + 1

2

In [142]:
list(range(-2, 10 - env.piecewidth(L_SHAPE_TEMPLATE[1])-1))

[-2, -1, 0, 1, 2, 3, 4, 5, 6]

In [283]:
env.get_next_states()

[['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '0' '0' '.' '.' '.' '.' '.' '.']
 ['1' '.' '.' '0' '.' '.' '.' '.' '.' '.']
 ['1' '.' '0' '0' '.' '.' '.' '.' '.' '.']
 ['1' '.' '0' '0' '.' '.' '.' '.' '.' '.']
 ['1' '.' '.' '0' '.' '.' '.' '.' '.' '.']
 ['0' '0' '0' '0' '.' '.' '0' '0' '0' '0']]
[['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' 

{(-2, 0): tensor([ 0.,  2., 16., 22.,  4.]),
 (-1, 0): tensor([ 0.,  2., 12., 22.,  4.]),
 (0, 0): tensor([ 0.,  2., 20., 22., 12.]),
 (1, 0): tensor([ 0.,  2., 20., 22.,  4.]),
 (2, 0): tensor([ 0.,  2., 12., 22.,  4.]),
 (3, 0): tensor([ 0.,  2., 18., 22.,  4.]),
 (4, 0): tensor([ 0.,  2., 20., 22.,  4.]),
 (5, 0): tensor([ 0.,  2., 20., 22.,  4.]),
 (6, 0): tensor([ 0.,  2., 20., 22.,  4.]),
 (7, 0): tensor([ 0.,  2., 16., 22.,  4.]),
 (0, 1): tensor([ 0., 12.,  8., 32.,  8.]),
 (1, 1): tensor([ 0., 13., 14., 33.,  8.]),
 (2, 1): tensor([ 0., 14., 12., 34.,  8.]),
 (3, 1): tensor([ 0., 19., 12., 39.,  7.]),
 (4, 1): tensor([ 0.,  4., 10., 24.,  6.]),
 (5, 1): tensor([ 0.,  3., 14., 23.,  5.]),
 (6, 1): tensor([ 0.,  2., 13., 22.,  4.])}