In [1]:
# first off all we imported libraries which we need
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

from tensorflow.keras.preprocessing.text import Tokenizer

from tensorflow.keras.models import Sequential

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.utils import to_categorical

from tensorflow.keras.losses import SparseCategoricalCrossentropy

import numpy as np

import pandas as pd

In [2]:
chess_df = pd.read_csv("data/games.csv")
chess_df = chess_df[chess_df.turns > 5]
chess_df.head()

Unnamed: 0,id,rated,created_at,last_move_at,turns,victory_status,winner,increment_code,white_id,white_rating,black_id,black_rating,moves,opening_eco,opening_name,opening_ply
0,TZJHLljE,False,1504210000000.0,1504210000000.0,13,outoftime,white,15+2,bourgris,1500,a-00,1191,d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...,D10,Slav Defense: Exchange Variation,5
1,l1NXvwaE,True,1504130000000.0,1504130000000.0,16,resign,black,5+10,a-00,1322,skinnerua,1261,d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...,B00,Nimzowitsch Defense: Kennedy Variation,4
2,mIICvQHh,True,1504130000000.0,1504130000000.0,61,mate,white,5+10,ischia,1496,a-00,1500,e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...,C20,King's Pawn Game: Leonardis Variation,3
3,kWKvrqYL,True,1504110000000.0,1504110000000.0,61,mate,white,20+0,daniamurashov,1439,adivanov2009,1454,d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...,D02,Queen's Pawn Game: Zukertort Variation,3
4,9tXo1AUZ,True,1504030000000.0,1504030000000.0,95,mate,white,30+3,nik221107,1523,adivanov2009,1469,e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...,C41,Philidor Defense,5


In [3]:
corpus = chess_df.moves
corpus.head()

0    d4 d5 c4 c6 cxd5 e6 dxe6 fxe6 Nf3 Bb4+ Nc3 Ba5...
1    d4 Nc6 e4 e5 f4 f6 dxe5 fxe5 fxe5 Nxe5 Qd4 Nc6...
2    e4 e5 d3 d6 Be3 c6 Be2 b5 Nd2 a5 a4 c5 axb5 Nc...
3    d4 d5 Nf3 Bf5 Nc3 Nf6 Bf4 Ng4 e3 Nc6 Be2 Qd7 O...
4    e4 e5 Nf3 d6 d4 Nc6 d5 Nb4 a3 Na6 Nc3 Be7 b4 N...
Name: moves, dtype: object

In [4]:
tokenizer = Tokenizer()

tokenizer.fit_on_texts(corpus)

In [5]:
total_words = len(tokenizer.word_index) + 1 

In [6]:
input_sequences = []

for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    
    for i in range(1, len(token_list)):
        if i < 10:
            n_gram_sequence = token_list[:i+1]
        else:
            n_gram_sequence = token_list[i-9:i+1]
    input_sequences.append(n_gram_sequence)
        
input_sequences[0:5]

[[5, 8],
 [5, 8, 11],
 [5, 8, 11, 23],
 [5, 8, 11, 23, 74],
 [5, 8, 11, 23, 74, 12]]

In [7]:
max_sequence_len = max([len(x) for x in input_sequences])
max_sequence_len

351

In [8]:
padded_sentences = []

batchsize = 20
batches = int(len(input_sequences) / batchsize) + 1

for batch in range(batches):
        padded_sentences_batch = pad_sequences(input_sequences[batchsize*batch:batchsize*(batch+1)], maxlen=max_sequence_len)
        
        for sentence in padded_sentences_batch:
            
            padded_sentences.append(sentence)

padded_sentences[0:5]

[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [44]:
# https://medium.datadriveninvestor.com/keras-training-on-large-datasets-3e9d9dbc09d4
import numpy as np
from tensorflow.keras.utils import Sequence

class My_Generator(Sequence):

    def __init__(self, data, labels, batch_size):
        self.data, self.labels = data, labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.data) / float(self.batch_size)))

    def __getitem__(self, idx):
        
        batch_x = self.data[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        return np.array(batch_x), np.array(batch_y)

In [10]:
X = []
labels = []

for i in padded_sentences:
    X.append(i[0:len(i) - 1])
    labels.append(i[-1])
    
X = np.array(X)
labels = np.array(labels)

In [12]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))


loss_fn = SparseCategoricalCrossentropy()
adam = Adam(lr=0.01)

model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

print(model)

<keras.engine.sequential.Sequential object at 0x7feaad501160>


  super(Adam, self).__init__(name, **kwargs)


In [40]:
int(np.ceil(len(X) / float(20)))

61316

In [47]:
batch_size = 20
num_training_samples = len(padded_sentences)
num_epochs = 50

my_training_batch_generator = My_Generator(X, labels, batch_size)

model.fit_generator(generator=my_training_batch_generator,
                                          steps_per_epoch=(num_training_samples // batch_size),
                                          epochs=num_epochs,
                                          verbose=1,
                                          use_multiprocessing=True,
                                          workers=16,
                                          max_queue_size=32)

Epoch 1/50


  model.fit_generator(generator=my_training_batch_generator,


 7177/61315 [==>...........................] - ETA: 10:23:02 - loss: 5.2950 - accuracy: 0.1009

Process Keras_worker_ForkPoolWorker-15:
Process Keras_worker_ForkPoolWorker-11:
Process Keras_worker_ForkPoolWorker-7:
Process Keras_worker_ForkPoolWorker-6:
Process Keras_worker_ForkPoolWorker-16:
Process Keras_worker_ForkPoolWorker-4:
Process Keras_worker_ForkPoolWorker-2:
Process Keras_worker_ForkPoolWorker-9:
Process Keras_worker_ForkPoolWorker-13:
Process Keras_worker_ForkPoolWorker-10:
Process Keras_worker_ForkPoolWorker-14:
Traceback (most recent call last):
Process Keras_worker_ForkPoolWorker-12:
Process Keras_worker_ForkPoolWorker-1:
Traceback (most recent call last):
Process Keras_worker_ForkPoolWorker-8:
Process Keras_worker_ForkPoolWorker-3:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "

KeyboardInterrupt
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
    with self._rlock:
KeyboardInterrupt
KeyboardInterrupt
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
    with self._rlock:
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
    with self._rlock:
  File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/usr/lib/py

KeyboardInterrupt: 