In [4]:
import pandas as pd
import sqlite3
import chess

In [5]:
connection = sqlite3.connect('chess_games.db')
cursor = connection.cursor()

In [6]:
# Query to get the schema of all tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()

for table_name in tables:
    table_name = table_name[0]
    print(f"Schema for table: {table_name}")
    cursor.execute(f"PRAGMA table_info({table_name});")
    schema = cursor.fetchall()
    for column in schema:
        print(column)
    print("-" * 40)

Schema for table: games
(0, 'id', 'INTEGER', 0, None, 1)
(1, 'result', 'TEXT', 0, None, 0)
(2, 'termination', 'TEXT', 0, None, 0)
----------------------------------------
Schema for table: moves
(0, 'id', 'INTEGER', 0, None, 1)
(1, 'game_id', 'INTEGER', 0, None, 0)
(2, 'move_number', 'INTEGER', 0, None, 0)
(3, 'move', 'TEXT', 0, None, 0)
(4, 'board_fen', 'TEXT', 0, None, 0)
----------------------------------------


In [7]:
cursor.execute("SELECT board_fen FROM moves LIMIT 5;")
games = cursor.fetchall()
games

[('rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR b KQkq - 0 1',),
 ('rnbqkbnr/ppppp1pp/8/5p2/3P4/8/PPP1PPPP/RNBQKBNR w KQkq - 0 2',),
 ('rnbqkbnr/ppppp1pp/8/5p2/3P4/6P1/PPP1PP1P/RNBQKBNR b KQkq - 0 2',),
 ('rnbqkbnr/ppppp2p/6p1/5p2/3P4/6P1/PPP1PP1P/RNBQKBNR w KQkq - 0 3',),
 ('rnbqkbnr/ppppp2p/6p1/5p2/3P4/6P1/PPP1PPBP/RNBQK1NR b KQkq - 1 3',)]

In [8]:
#Visualize the first game
for game in games:
    board = chess.Board(game[0])
    print(board)
    print("-" * 40)

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . P . . . .
. . . . . . . .
P P P . P P P P
R N B Q K B N R
----------------------------------------
r n b q k b n r
p p p p p . p p
. . . . . . . .
. . . . . p . .
. . . P . . . .
. . . . . . . .
P P P . P P P P
R N B Q K B N R
----------------------------------------
r n b q k b n r
p p p p p . p p
. . . . . . . .
. . . . . p . .
. . . P . . . .
. . . . . . P .
P P P . P P . P
R N B Q K B N R
----------------------------------------
r n b q k b n r
p p p p p . . p
. . . . . . p .
. . . . . p . .
. . . P . . . .
. . . . . . P .
P P P . P P . P
R N B Q K B N R
----------------------------------------
r n b q k b n r
p p p p p . . p
. . . . . . p .
. . . . . p . .
. . . P . . . .
. . . . . . P .
P P P . P P B P
R N B Q K . N R
----------------------------------------


In [26]:
#Extract the dataset
cursor.execute('''SELECT game_id, move, move_number FROM moves
               LIMIT 1000;''')
moves = cursor.fetchall()
moves_df = pd.DataFrame(moves, columns=['game_id', 'move', 'move_number'])
moves_dict = moves_df.groupby('game_id')['move'].apply(list).to_numpy()
moves_dict

array([list(['d2d4', 'f7f5', 'g2g3', 'g7g6', 'f1g2', 'f8g7', 'g1f3', 'd7d6', 'c2c3', 'e7e6', 'a2a4', 'g8f6', 'd1c2', 'd8e7', 'b1d2', 'e6e5', 'd4e5', 'd6e5', 'e2e4', 'b8c6', 'e1g1', 'f5e4', 'd2e4', 'c8f5', 'f3d2', 'e8c8', 'b2b4', 'g7h6', 'f1e1', 'h6d2', 'c1d2', 'f6e4', 'g2e4', 'e7e6', 'd2g5', 'd8d6', 'a1d1', 'd6d1', 'e1d1', 'h7h6', 'g5e3', 'a7a5', 'c2b1', 'h6h5', 'b4b5', 'c6e7', 'e3g5', 'h8e8', 'h2h4', 'e6c4', 'd1e1', 'f5e4', 'e1e4', 'c4e6', 'g5f4', 'e6f5', 'f4e5', 'e7d5', 'b1e1', 'd5b6', 'f2f4', 'b6d7', 'e1e2', 'b7b6', 'e4e3', 'e8e7', 'e3e4', 'd7c5', 'e4d4', 'e7d7', 'g1g2', 'c8d8', 'g2h2', 'd8c8', 'e2g2', 'c8b8', 'g2a2', 'b8a7', 'a2g2', 'a7b8', 'g2e2', 'b8c8', 'e2f3', 'c8b8', 'f3d1', 'b8c8', 'd1e2', 'c8b8', 'e2d1', 'b8b7', 'd4d7', 'c5d7', 'e5d4', 'd7c5', 'h2g2', 'f5d5', 'g2g1', 'd5f5', 'd4c5', 'f5c5', 'd1d4', 'c5f5', 'd4d2', 'f5b1', 'g1f2', 'b1b3', 'd2d4', 'b3c2', 'f2e3', 'b7c8', 'd4h8', 'c8b7', 'h8d4', 'b7b8', 'd4d8', 'b8b7', 'd8d5', 'b7b8', 'd5g8', 'b8b7', 'g8c4', 'b7b8', 'c4g8', 'b8