<a href="https://colab.research.google.com/github/MichalRyszardWojcik/hello-world/blob/master/2020_09_08_grawszachy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Section 0: Import and Install

In [None]:
import os
import numpy as np
! pip install -q -U trax
import trax

import chess
import chess.pgn
import chess.svg

from IPython.core.display import HTML
from IPython.display import Javascript

# Section 1: Python Chess vs Our Tokens and Chess GUI

In [28]:
def move2token(move): return 64*move.from_square + move.to_square

def token2move(token):
  from_square = token // 64
  to_square = token % 64
  return chess.Move(from_square,to_square)

def game2tokens(game):
  tokens = []
  line = game.main_line()
  for move in line: tokens.append(move2token(move))
  return tokens

def tokens2board(tokens):
  board = chess.Board()
  for token in tokens:
    move = token2move(token)
    board.push(move)
  return board

def tokens2apronus(tokens):
  moves = []
  for token in tokens:
    move = token2move(token).uci()
    moves.append(move)
  m = '_'.join(moves)
  return 'https://www.apronus.com/chess/pgnviewer/?m=' + m

def inputmoves_to_tokens(moves):
  tokens = []
  moves = moves.split('_')
  for move in moves:
    move = chess.Move.from_uci(move)
    token = move2token(move)
    tokens.append(token)
  return tokens

def displaygame(tokens):
  print(tokens2apronus(tokens))
  board = chess.Board()
  html = ''
  for x in tokens:
    from_square = x//64
    to_square = x % 64
    move = chess.Move(from_square,to_square)
    if (x == 0):
      board.clear()
      header = 'Game Over'
    else:
      legal = move in board.legal_moves
      if legal:
        legal = 'legal'
        style = ''
      else:
        legal = 'illegal'
        style = 'background:red; color:yellow; font-weight:bold;'
      header = '<div style="'+style+'">after '+legal+' '+move.uci()+'</div>'
      board.push(move)
    style = 'display:inline-block; width:20em; text-align:center; margin-bottom: 1em;'
    html += '<div style="'+style+'">'+header+chess.svg.board(board)+'</div>'
  display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))
  display(HTML(html))

def usermove(board):
  while True:
    print('Type your move in UCI format:')
    usermove = input()
    if usermove == '': return ''
    move = chess.Move.from_uci(usermove)
    if move in board.legal_moves: return move;
    print('You have typed an illegal move.')

# Section 2: PGN Import and Tokenization

(1) We download the PGN file https://chessdiagram.online/games/lichess_db_standard_rated_2014-07.zip from my own server, originally taken from https://database.lichess.org/. It has almost a million games played by humans in July 2014 on Lichess. It is possible to get more games from there.

(2) Next we tokenize this PGN file and save as "/content/tokens1000k.txt".

(3) But tokenization takes a long time, longer than training a large model. Therefore, steps (1) and (2) are skipped by default (commented out) and instead we load the ready-made tokenized database from https://chessdiagram.online/games/tokens1000k.zip


In [None]:
g_max_ile = 1100*1000
def tokenize(input_pgn_file,output_file):
  ile = 0
  while True:
    game = chess.pgn.read_game(pgn)
    if game == None: return ile
    if len(game.errors) > 0: print(game.errors)
    game = game2tokens(game)
    if len(game) >= 24:
      game = str(game).strip('[]')
      output_file.write(game)
      ile += 1
      if ile % 1000 == 0: print(f'{ile//1000}K games tokenized')
      if ile == g_max_ile:
        output_file.close()
        return ile
      else:
        output_file.write('\n')

In [None]:
# !wget https://chessdiagram.online/games/lichess_db_standard_rated_2014-07.zip
# !unzip lichess_db_standard_rated_2014-07.zip

In [None]:
#pgn = open('lichess_db_standard_rated_2014-07.pgn','r') # 1,048,440 games
#tokens = open('/content/tokens1000k.txt','w')
#ile = tokenize(pgn,tokens)
#ile

In [None]:
!wget https://chessdiagram.online/games/tokens1000k.zip
!unzip tokens1000k.zip

In [15]:
def load_tokenized_games(tokens_file):
  tokenized_games = []
  for line in tokens_file:
    pregame = line.split(", ")
    game = []
    for x in pregame:
      game.append(int(x))
    tokenized_games.append(game)
  return tokenized_games

In [None]:
#tokens = open('/content/tokens.txt','r')
tokens = open('tokens1000k.txt','r')
tokenized_games = load_tokenized_games(tokens)
ile = len(tokenized_games)
print(ile) #979685 almost a million games

The global variable <code>tokenized_games</code> is a list of tokenized games from the PGN database. It is used in the training process in the next section.

# Section 3: Training the model

The training input is compoed of batches of random games. The batch size is **128 games** and the maximum game length is **60 half-moves**, which is 30 White moves and 30 Black moves.

In [17]:
game_length = 60
g_vocab_size = 64*64

This section relies on the global variable <code>tokenized_games</code> prepared in the previous section.

In [18]:
g_ile = len(tokenized_games)
def random_tokenized_game():
  i = np.random.randint(0, g_ile)
  return tokenized_games[i]

In [19]:
def training_input(batch_size, length):
  while True:
    shape = (batch_size,length)
    games = np.zeros(shape, np.int32)
    for y in range(batch_size):
      game = random_tokenized_game()[0:length-1] # leaves a zero at the end
      for x in range( min( len(game), length ) ):
        games[y,x] = game[x]
    inputs = games
    targets = games
    loss_weights = np.ones(shape, np.int32)
    yield (inputs,targets,loss_weights)

trax_inputs = trax.data.inputs.Inputs(lambda _: training_input(batch_size=128, length=game_length))

In [20]:
def perform_training(lm,n_epochs,train_steps,eval_steps):
  output_dir = os.path.expanduser('~/train_dir/')
  !rm -f ~/train_dir/model.pkl.gz  # Remove old model.
  trainer = trax.supervised.Trainer(
      model=lm,
      loss_fn=trax.layers.CrossEntropyLoss(),
      optimizer=trax.optimizers.Adafactor,  # Change optimizer params here.
      lr_schedule=trax.lr.warmup_and_rsqrt_decay(400, 0.01),  # Change lr schedule here.
      inputs=trax_inputs,
      output_dir=output_dir)
  for _ in range(n_epochs):
    trainer.train_epoch(train_steps, eval_steps)

In [None]:
def Chess_transformer_lm_2020_09_04(mode='train'):
  return trax.models.TransformerLM(  
          d_model=128,
          d_ff=256,
          n_heads=2,
          n_layers=2, 
          vocab_size= g_vocab_size,
          mode=mode)

def perform_training_2020_09_04():
  perform_training(
    lm=Chess_transformer_lm_2020_09_04,
    n_epochs = 10,
    train_steps = 500,
    eval_steps = 2)

perform_training_2020_09_04()
model_2020_09_04 = ChessModel()
model_2020_09_04.lm = Chess_transformer_lm_2020_09_04('predict')
model_2020_09_04.path = '/root/train_dir/model.pkl.gz'

## Section 4.1: Training the stronger model (2090-09-07)

In [None]:
def Chess_transformer_lm_2020_09_07(mode='train'):
  return trax.models.TransformerLM(  
        d_model=512,
        d_ff=2048,
        n_heads=4,
        n_layers=8, 
        vocab_size= g_vocab_size,
        mode=mode)

The file https://chessdiagram.online/games/model-512-2048-4-8.pkl.gz contains a model trained in the following way:
<pre><code>n_epochs = 10
train_steps = 500
eval_steps = 2

Step    500: Ran 500 train steps in 346.33 secs
Step    500: Evaluation
Step    500: train                   accuracy |  0.30423179
Step    500: train                       loss |  3.55055189
Step    500: train         neg_log_perplexity | -3.55055189
Step    500: train          sequence_accuracy |  0.00000000
Step    500: train weights_per_batch_per_core |  7680.00000000
Step    500: eval                    accuracy |  0.30260420
Step    500: eval                        loss |  3.60096693
Step    500: eval          neg_log_perplexity | -3.60096693
Step    500: eval           sequence_accuracy |  0.00000000
Step    500: eval  weights_per_batch_per_core |  7680.00000000
Step    500: Finished evaluation

Step   1000: Ran 500 train steps in 263.17 secs
Step   1000: Evaluation
Step   1000: train                   accuracy |  0.34700525
Step   1000: train                       loss |  2.92200994
Step   1000: train         neg_log_perplexity | -2.92200994
Step   1000: train          sequence_accuracy |  0.00000000
Step   1000: train weights_per_batch_per_core |  7680.00000000
Step   1000: eval                    accuracy |  0.33118492
Step   1000: eval                        loss |  2.99088240
Step   1000: eval          neg_log_perplexity | -2.99088240
Step   1000: eval           sequence_accuracy |  0.00000000
Step   1000: eval  weights_per_batch_per_core |  7680.00000000
Step   1000: Finished evaluation

Step   1500: Ran 500 train steps in 264.86 secs
Step   1500: Evaluation
Step   1500: train                   accuracy |  0.35657555
Step   1500: train                       loss |  2.74071789
Step   1500: train         neg_log_perplexity | -2.74071789
Step   1500: train          sequence_accuracy |  0.00000000
Step   1500: train weights_per_batch_per_core |  7680.00000000
Step   1500: eval                    accuracy |  0.37526044
Step   1500: eval                        loss |  2.66420746
Step   1500: eval          neg_log_perplexity | -2.66420746
Step   1500: eval           sequence_accuracy |  0.00000000
Step   1500: eval  weights_per_batch_per_core |  7680.00000000
Step   1500: Finished evaluation

Step   2000: Ran 500 train steps in 264.58 secs
Step   2000: Evaluation
Step   2000: train                   accuracy |  0.37539065
Step   2000: train                       loss |  2.59341145
Step   2000: train         neg_log_perplexity | -2.59341145
Step   2000: train          sequence_accuracy |  0.00000000
Step   2000: train weights_per_batch_per_core |  7680.00000000
Step   2000: eval                    accuracy |  0.37265629
Step   2000: eval                        loss |  2.59073925
Step   2000: eval          neg_log_perplexity | -2.59073925
Step   2000: eval           sequence_accuracy |  0.00000000
Step   2000: eval  weights_per_batch_per_core |  7680.00000000
Step   2000: Finished evaluation

Step   2500: Ran 500 train steps in 263.33 secs
Step   2500: Evaluation
Step   2500: train                   accuracy |  0.38242191
Step   2500: train                       loss |  2.50210452
Step   2500: train         neg_log_perplexity | -2.50210452
Step   2500: train          sequence_accuracy |  0.00000000
Step   2500: train weights_per_batch_per_core |  7680.00000000
Step   2500: eval                    accuracy |  0.38509119
Step   2500: eval                        loss |  2.47769928
Step   2500: eval          neg_log_perplexity | -2.47769928
Step   2500: eval           sequence_accuracy |  0.00000000
Step   2500: eval  weights_per_batch_per_core |  7680.00000000
Step   2500: Finished evaluation

Step   3000: Ran 500 train steps in 263.82 secs
Step   3000: Evaluation
Step   3000: train                   accuracy |  0.39967448
Step   3000: train                       loss |  2.38351250
Step   3000: train         neg_log_perplexity | -2.38351250
Step   3000: train          sequence_accuracy |  0.00000000
Step   3000: train weights_per_batch_per_core |  7680.00000000
Step   3000: eval                    accuracy |  0.40162763
Step   3000: eval                        loss |  2.38189268
Step   3000: eval          neg_log_perplexity | -2.38189268
Step   3000: eval           sequence_accuracy |  0.00000000
Step   3000: eval  weights_per_batch_per_core |  7680.00000000
Step   3000: Finished evaluation

Step   3500: Ran 500 train steps in 262.57 secs
Step   3500: Evaluation
Step   3500: train                   accuracy |  0.40065107
Step   3500: train                       loss |  2.35326505
Step   3500: train         neg_log_perplexity | -2.35326505
Step   3500: train          sequence_accuracy |  0.00000000
Step   3500: train weights_per_batch_per_core |  7680.00000000
Step   3500: eval                    accuracy |  0.39355472
Step   3500: eval                        loss |  2.36699247
Step   3500: eval          neg_log_perplexity | -2.36699247
Step   3500: eval           sequence_accuracy |  0.00000000
Step   3500: eval  weights_per_batch_per_core |  7680.00000000
Step   3500: Finished evaluation

Step   4000: Ran 500 train steps in 262.16 secs
Step   4000: Evaluation
Step   4000: train                   accuracy |  0.40716147
Step   4000: train                       loss |  2.31738281
Step   4000: train         neg_log_perplexity | -2.31738281
Step   4000: train          sequence_accuracy |  0.00000000
Step   4000: train weights_per_batch_per_core |  7680.00000000
Step   4000: eval                    accuracy |  0.38880211
Step   4000: eval                        loss |  2.41327047
Step   4000: eval          neg_log_perplexity | -2.41327047
Step   4000: eval           sequence_accuracy |  0.00000000
Step   4000: eval  weights_per_batch_per_core |  7680.00000000
Step   4000: Finished evaluation

Step   4500: Ran 500 train steps in 261.18 secs
Step   4500: Evaluation
Step   4500: train                   accuracy |  0.39837241
Step   4500: train                       loss |  2.30720091
Step   4500: train         neg_log_perplexity | -2.30720091
Step   4500: train          sequence_accuracy |  0.00000000
Step   4500: train weights_per_batch_per_core |  7680.00000000
Step   4500: eval                    accuracy |  0.40709639
Step   4500: eval                        loss |  2.29545569
Step   4500: eval          neg_log_perplexity | -2.29545569
Step   4500: eval           sequence_accuracy |  0.00000000
Step   4500: eval  weights_per_batch_per_core |  7680.00000000
Step   4500: Finished evaluation

Step   5000: Ran 500 train steps in 262.42 secs
Step   5000: Evaluation
Step   5000: train                   accuracy |  0.41028649
Step   5000: train                       loss |  2.24561310
Step   5000: train         neg_log_perplexity | -2.24561310
Step   5000: train          sequence_accuracy |  0.00000000
Step   5000: train weights_per_batch_per_core |  7680.00000000
Step   5000: eval                    accuracy |  0.42076826
Step   5000: eval                        loss |  2.22580862
Step   5000: eval          neg_log_perplexity | -2.22580862
Step   5000: eval           sequence_accuracy |  0.00000000
Step   5000: eval  weights_per_batch_per_core |  7680.00000000
Step   5000: Finished evaluation
</code></pre>

## Section 4.2: Downloading the stronger model (2020-09-07)

In [None]:
!wget https://chessdiagram.online/games/model-512-2048-4-8.pkl.gz

# '/content/model-512-2048-4-8.pkl.gz'

# Section 4: Testing the model

We use three ways to interact with our chess playing models:
1. <code>illegalgame(firstmoves,model,length)</code>
<br>displays a game starting with <code>firstmoves</code> followed by <code>model</code>-generated (not necessarily) legal moves
2. <code>legalgame(firstmoves,model,length)</code>
<br>displays a game starting with <code>firstmoves</code> followed by <code>model</code>-generated legal moves
3. <code>playchess(firstmoves,model)</code>
<br>lets the user play against the <code>model</code> from the position after <code>firstmoves</code>

The string <code>firstmoves</code> is a sequence of UCI formatted moves separated by the underscore, for example <code>firstmoves = 'f2f3_e7e5_g2g4_d8h4'</code>.

The argument <code>model</code> is an object with two properties:
1. <code>model.lm</code> is the result of calling <code>trax.models.TransformerLM(mode='predict')</code>
2. <code>model.path</code> is the path to the model file 

Note that the argument <code>length</code>  is used to limit the number of half-moves outputted by the model.
<br>It defaults to the global variable <code>game_length = 60</code>.

These functions output the games as links to a chess editor,<br>
for example https://www.apronus.com/chess/pgnviewer/?m=f2f3_e7e5_g2g4_d8h4
<br>This allows us to view the games move by move on an interactive chessboard and to export them as PGN. The tails of these links can also be used as the input argument <code>firstmoves</code>.


In [6]:
class ChessModel():
  lm = None
  path = None

model_2020_09_07 = ChessModel()
model_2020_09_07.lm = trax.models.TransformerLM(d_model=512, d_ff=2048, n_heads=4, n_layers=8, vocab_size= g_vocab_size, mode='predict')
model_2020_09_07.path = '/content/model-512-2048-4-8.pkl.gz'


The following two functions <code>autoregressive_sample_stream</code> and <code>autoregressive_sample</code><br>
from https://github.com/google/trax/blob/master/trax/supervised/decoding.py
<br>are modified to ensure that the model produces only legal chess moves.

In [7]:
from trax import layers as tl

def autoregressive_sample_stream(model, inputs=None,
                                 batch_size=1, temperature=1.0,
                                 start_id=0, accelerate=True):
  if inputs is not None and inputs.shape[0] != batch_size:
    raise ValueError(f'Inputs batch size ({inputs.shape[0]}) does not match '
                     f'batch_size arg ({batch_size}.')

  fast_model = tl.Accelerate(model) if accelerate else model
  start_symbol = np.full((batch_size, 1), start_id, dtype=np.int32)
  if model.n_in == 1 and inputs is not None:
    current_symbols = np.concatenate([start_symbol, inputs], axis=1)
  else:
    current_symbols = start_symbol

  tokens = inputs.flatten()
  board = tokens2board(tokens)

  while True:
    if model.n_in > 1 and inputs is not None:
      logits = fast_model((inputs, current_symbols))[0]
    else:
      logits = fast_model(current_symbols)
    #sample = tl.logsoftmax_sample(logits[:, -1, :], temperature=temperature)
    x = logits[:, -1, :].flatten()
    positive = np.exp(x)
    legalmoves = board.legal_moves
    for x in range(len(positive)):
      if token2move(x) not in legalmoves:
        positive[x] = 0
    next_token = np.argmax(positive)
    board.push(token2move(next_token))
    sample = next_token
    sample = np.array(sample).reshape((1,1))
    yield sample
    # NOTE: Because the model is autoregressive and in 'predict' mode, its
    # history is cached in the model state and the next input is the single
    # symbol just sampled.
    current_symbols = sample[:, None]


def autoregressive_sample(model, inputs=None,
                          batch_size=1, temperature=1.0,
                          start_id=0, eos_id=1, max_length=100,
                          accelerate=True):
  result = []
  eos_seen = []
  counter = 0
  for sample in autoregressive_sample_stream(
      model, inputs, batch_size=batch_size, temperature=temperature,
      start_id=start_id, accelerate=accelerate):
    sample = sample[:, None]
    result.append(sample)
    counter += 1
    if counter >= max_length:
      return np.concatenate(result, axis=1)
    # Check at which batch positions have we already encountered EOS.
    for j in range(batch_size):
      if int(sample[j, 0]) == eos_id:
        eos_seen.append(j)
    # If EOS has been seen on all positions, stop.
    if all([j in eos_seen for j in range(batch_size)]):
      return np.concatenate(result, axis=1)
  return np.concatenate(result, axis=1)

In [8]:
def _displaygame(legal,firstmoves,model,length):
  firstmoves = inputmoves_to_tokens(firstmoves)
  board = tokens2board(firstmoves)
  display(board)
  input = np.array(firstmoves).reshape((1,len(firstmoves)))

  print(f'Using model: {model.path}')
  predict_model = model.lm
  predict_signature = trax.shapes.ShapeDtype((1,1), dtype=np.int32)
  predict_model.init_from_file(model.path,weights_only=True, input_signature=predict_signature)
  if legal:
    played =                          autoregressive_sample(predict_model, input, temperature=0.0, max_length=length, eos_id = 0)
  else:
    played = trax.supervised.decoding.autoregressive_sample(predict_model, input, temperature=0.0, max_length=length, eos_id = 0)

  gamestart = input.flatten()
  gameplayed = played.flatten()
  game = np.concatenate((gamestart,gameplayed))
  displaygame(game)

def illegalgame(firstmoves,model,length=game_length):
  _displaygame(False,firstmoves,model,length)
def legalgame(firstmoves, model, length=game_length):
  _displaygame(True,firstmoves,model,length)

In [None]:
illegalgame('e2e4_e7e5',model_2020_09_07,40)

In [None]:
legalgame('e2e4_e7e5',model_2020_09_07,41)

In [23]:
def _legal_token_after_tokens(firstmoves,model):
  input = np.array(firstmoves).reshape((1,len(firstmoves)))
  predict_model = model.lm
  predict_signature = trax.shapes.ShapeDtype((1,1), dtype=np.int32)
  predict_model.init_from_file(model.path,weights_only=True, input_signature=predict_signature)
  return autoregressive_sample(predict_model, input, temperature=0.0, max_length=1, eos_id = 0)

def playchess(moves,model):
  board = chess.Board()
  tokens = []
  if moves != '':
    moves = moves.split('_')
    for move in moves:
      move = chess.Move.from_uci(move)
      board.push(move)
      token = move2token(move)
      tokens.append(token)
  display(board)
  while not board.is_game_over():
    move = usermove(board)
    if move == '': return
    board.push(move)
    display(board)
    tokens.append(move2token(move))
    if not board.is_game_over():
      token = _legal_token_after_tokens(tokens,model)
      token = token.flatten()[0]
      tokens.append(token)
      move = token2move(token)
      board.push(move)
      print(f'Using model: {model.path}')
      display(board)
      print(tokens2apronus(tokens))

In [None]:
playchess('',model_2020_09_04)

In [None]:
playchess('',model_2020_09_07)

# Section 5: Notes and observations

The weaker engine fails to capture the queen after<br>
https://www.apronus.com/chess/pgnviewer/?m=e2e4_e7e5_f1c4_g8f6_d1h5_g7g6_b1c3_f8g7

In [None]:
playchess('e2e4_e7e5_f1c4_g8f6',model_2020_09_04)

The stronger model captured the queen at h5:<br>
https://www.apronus.com/chess/pgnviewer/?m=e2e4_e7e5_f1c4_f8c5_b1c3_g8f6_d1h5_f6h5

In [None]:
legalgame('e2e4_e7e5_f1c4_g8f6_d1h5',model_2020_09_07,1)

(It played quite well despite leaving the bishop c5 unprotected and blundering the queen at h4:
https://www.apronus.com/chess/pgnviewer/?m=e2e4_e7e5_f1c4_f8c5_b1c3_g8f6_d1h5_f6h5_g2g4_h5f4_c3a4_e8g8_a4c5_d7d6_a2a4_c8g4_c5e6_g4e6_c4f1_b8c6_a4a5_c6d4_h2h4_d4c2_e1d1_c2a1_a5a6_b7b6_b2b4_d8h4_h1h4_f7f5)



The stronger model is much more interesting after 1.Nh3. The old model ignored this move and imitated a game with no regard to the knight having moved to h3. But the stronger model actually uses this knight to go to f7:
https://www.apronus.com/chess/pgnviewer/?m=g1h3_e7e5_h3g5_d7d5_g5f7_e8f7_e2e3_g8f6_b1c3_f8b4_a2a3_b4c3_d2c3_e5e4_f1e2_h8e8_e1g1_f7g8_c1d2_c7c6_c3c4_c8e6_c4d5_c6d5_c2c4_b8c6_c4d5_e6d5_d2c3_d5d4_c3d2_d4e3_d2e3_e8e3_f2e3_e8e3_d1c2_c6d4_c2c3_d4e2_g1h1_e2g3_h2g3_f6e4_h1h2_g3f1_a1f1_e4g3_h2g3_f1g3_f1f4_g3f1_f4f1_g3f1_d1f1_g3f1_0000


In [None]:
legalgame('g1h3',model_2020_09_04,7)

In [None]:
legalgame('g1h3',model_2020_09_07,7)