In [1]:
import pgn
import os
import pickle

from EWOthello.data.othello import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
real_data_path = "../EWOthello/data/othello_championship/"
synth_data_path = "../EWOthello/data/othello_synthetic/"

First, check out at the real and synthetic game data and after, the dataloader class, "othello"

In [3]:
# View the processed, real game sequences
# pgn files contain the real game data records
# Games after pgn loader is a datastructure with attributed move

fn = os.listdir(real_data_path)
print("file_name: ", fn[0], "\n")

with open(real_data_path + fn[0], "r") as fhandle:
    pgn_text = fhandle.read()
    pgn_games_obj = pgn.loads(pgn_text)

game1 = pgn_games_obj[0]
game1_moves = game1.moves
print("first game: ", game1, "\n")
print("game moves: ", game1_moves)

# KLi Othello converts moves to board number in his othello class
# which counts from the top left of the board to the bottom right
rows = list("ABCDEFGH")
columns = [str(_) for _ in range(1, 9)]
sequence = []
for move in game1_moves:
    sequence.append(rows.index(move[0]) * 8 + columns.index(move[1]))
print("game sequence processed: ", sequence)

file_name:  WTHOR-1990.pgn 

first game:  <PGNGame "Veleskevich Vladimi" vs "Melnikov Aleksandr"> 

game moves:  ['F5', 'F6', 'E6', 'F4', 'G5', 'E7', 'F7', 'H6', 'E3', 'D6', 'H5', 'G6', 'G4', 'D3', 'D8', 'H3', 'H4', 'G3', 'F3', 'F8', 'C2', 'E2', 'D2', 'C4', 'C3', 'G8', 'F1', 'D1', 'D7', 'B4', 'C1', 'E1', 'F2', 'B3', 'C6', 'C5', 'C7', 'B6', 'B7', 'G2', 'E8', 'A8', 'H8', 'H7', 'G7', 'B8', 'B2', 'C8', 'G1', 'H1', 'H2', 'B1', 'A1', 'A2', 'A3', 'A4', 'A5', 'B5', 'A7', 'A6']
game sequence processed:  [44, 45, 37, 43, 52, 38, 46, 61, 34, 29, 60, 53, 51, 26, 31, 58, 59, 50, 42, 47, 17, 33, 25, 19, 18, 55, 40, 24, 30, 11, 16, 32, 41, 10, 21, 20, 22, 13, 14, 49, 39, 7, 63, 62, 54, 15, 9, 23, 48, 56, 57, 8, 0, 1, 2, 3, 4, 12, 6, 5]


In [4]:
# View the synthetic game sequences
with open(synth_data_path + "gen10e5__20220324_153933.pickle", "rb") as file:
    data = pickle.load(file)

print("num synthetic games in file: ", len(data))
print("first synthetic game: ", data[0])

num synthetic games in file:  99999
first synthetic game:  [26, 20, 45, 42, 43, 44, 37, 25, 50, 30, 41, 51, 34, 48, 29, 59, 12, 33, 24, 21, 23, 16, 32, 46, 13, 31, 49, 5, 8, 40, 57, 15, 60, 58, 54, 56, 6, 3, 4, 52, 2, 19, 53, 55, 22, 38, 18, 11, 39, 0, 63, 14, 47, 61, 62, 9, 10, 17, 7, 1]


Now we know what the raw data looks like in both cases, let us view the dataloader object from the paper (which we may reuse)

In [5]:
# The Othello class will serve as the dataset object, handling loading (or generating batches) of game-play data
# with data_root set to none, we load ood_num number of synthetic games
othello_synth = get(ood_num=-1, num_preload=230)
print( len(othello_synth), othello_synth.sequences[0])

# There should be 20M + 3.796M synthetic games according to the paper text
# I get slightly fewer games for whatever reason but largely the same 

Max num files: 230; Use_num: 230
['gen10e5__20220324_165952.pickle', 'gen10e5__20220324_154919.pickle', 'gen10e5__20220324_164123.pickle', 'gen10e5__20220324_154043.pickle', 'gen10e5__20220324_155251.pickle', 'gen10e5__20220324_160016.pickle', 'gen10e5__20220324_165748.pickle', 'gen10e5__20220324_154002.pickle', 'gen10e5__20220324_155241.pickle', 'gen10e5__20220324_165707.pickle', 'gen10e5__20220324_160046.pickle', 'gen10e5__20220324_154811.pickle', 'gen10e5__20220324_154806.pickle', 'gen10e5__20220324_162637.pickle', 'gen10e5__20220324_154048.pickle', 'gen10e5__20220324_155439.pickle', 'gen10e5__20220324_155255.pickle', 'gen10e5__20220324_154235.pickle', 'gen10e5__20220324_160049.pickle', 'gen10e5__20220324_154032.pickle', 'gen10e5__20220324_164213.pickle', 'gen10e5__20220324_155245.pickle', 'gen10e5__20220324_154722.pickle', 'gen10e5__20220324_165841.pickle', 'gen10e5__20220324_162202.pickle', 'gen10e5__20220324_154533.pickle', 'gen10e5__20220324_164648.pickle', 'gen10e5__20220324_17

Mem Used: 14.66 GB: 100%|██████████| 230/230 [00:43<00:00,  5.29it/s]


Deduplicating...
Deduplicating finished with 22996144 games left
Using 20 million for training, 2996144 for validation
20000000 [19, 18, 17, 9, 1, 0, 26, 2, 10, 11, 4, 20, 13, 34, 25, 3, 43, 29, 12, 6, 45, 50, 37, 42, 52, 53, 54, 8, 41, 51, 58, 49, 14, 22, 33, 48, 16, 21, 40, 32, 7, 24, 5, 61, 57, 55, 15, 44, 59, 38, 63, 30, 47, 60, 39, 46, 31, 23, 62, 56]


In [6]:
othello_real = get(data_root="othello_championship", wthor=True)
#print(len(othello_real))
#print(othello_real.results)
#print(othello_real.sequences[0])

# Paper states there should be 7605 + 132,921 games in total combining the two datasets
# Note that without edit, the othello object doesnt define self.val when using professional data in this case

Loaded 2986/2986 (qualified/total) sequences from WTHOR-1990.pgn
Loaded 465/465 (qualified/total) sequences from liveothello2021.pgn
Loaded 1127/1127 (qualified/total) sequences from WTHOR-2019.pgn
Loaded 3347/3347 (qualified/total) sequences from WTHOR-1992.pgn
Loaded 487/487 (qualified/total) sequences from liveothello2015.pgn
Loaded 199/199 (qualified/total) sequences from WTHOR-1983.pgn
Loaded 413/413 (qualified/total) sequences from liveothello2013.pgn
Loaded 644/645 (qualified/total) sequences from liveothello2016.pgn
Loaded 4343/4343 (qualified/total) sequences from WTHOR-1994.pgn
Loaded 892/893 (qualified/total) sequences from liveothello2017.pgn
Loaded 4348/4348 (qualified/total) sequences from WTHOR-2009.pgn
Loaded 2396/2396 (qualified/total) sequences from WTHOR-2013.pgn
Loaded 8077/8077 (qualified/total) sequences from WTHOR-1998.pgn
Loaded 1386/1386 (qualified/total) sequences from WTHOR-1987.pgn
Loaded 2186/2186 (qualified/total) sequences from WTHOR-1989.pgn
Loaded 3152/

Look at the game board player

In [8]:
OB = OthelloBoardState()
OB.__print__()

OB.update([permit("c5")])
OB.__print__()

OB.update([permit("c4")])
OB.__print__()

OB.update([permit("d3")])
OB.__print__()

--------------------
[]
a                
b                
c                
d       O X      
e       X O      
f                
g                
h                
  1 2 3 4 5 6 7 8
--------------------
--------------------
['c5']
a                
b                
c         O      
d       O O      
e       X O      
f                
g                
h                
  1 2 3 4 5 6 7 8
--------------------
--------------------
['c5', 'c4']
a                
b                
c       X O      
d       X O      
e       X O      
f                
g                
h                
  1 2 3 4 5 6 7 8
--------------------
--------------------
['c5', 'c4', 'd3']
a                
b                
c       X O      
d     O O O      
e       X O      
f                
g                
h                
  1 2 3 4 5 6 7 8
--------------------
