# Steps: 

1) Initialize notebook <br>
2) Import games from PGN <br>
3) Import opening trees <br>

## 1a) Get friendly with the database and the PyChess library

In [1]:
import sqlite3
import chess
import chess.pgn
testboard = chess.Board()
from chess.pgn import ChildNode

In [2]:
DATABASE = "chessdelite.db"
db_connection = sqlite3.connect(DATABASE) 
chess_db = db_connection.cursor()

# Code for shutting down the database connection is at the end of the notebook.

## 1b) Chess delite constants and utility functions

In [3]:
# Enum constants 

# Basic stuff
initial_position = 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'
WHITE_TO_MOVE = True
BLACK_TO_MOVE = False

# 'Game' types
cRegularGame = 1
cOpeningTree = 2
cWhiteRepertoire = 3
cBlackRepertoire = 4
cTheoreticalEnding = 5

# Dummy variables for Game record
''' See documentation for details, but Tournament/Player authority lists (for relational
records) are not a thing yet. The game info display right now is pulled from the 
PGN's headers.
'''
cNoRealPlayer = 0
cNoRealSource = 0
cRoundZero = 0

# Game results
cUnknownResult = -2
cWhiteWin = 1
cDraw = 0
cBlackWin = -1

# Node level in PyChess's hierarchy of variations
cRoot = 0

def split_fen(hotFEN: str) -> tuple:
    fen_list = hotFEN.split()
    fen_whose_move = fen_list[1]
    fen_castling_privileges = fen_list[2]
    fen_rows = fen_list[0]
    return (fen_rows, fen_whose_move, fen_castling_privileges)

def get_FEN_hash_from_FEN_string(hotFEN: str) -> str:
    fen_list = hotFEN.split()
    fen_whose_move = fen_list[1]
    fen_castling_privileges = fen_list[2]
    fen_rows = fen_list[0]
    return fen_rows + fen_whose_move + fen_castling_privileges

def get_tuple_for_position_insert(positionFEN: str, position_key: int = 0 ) -> tuple:
    fen_rows_string, fen_whose_move, fen_castling_privileges = split_fen(positionFEN)
    fen_rows_list = fen_rows_string.split("/")
    fen_rows_list.append(fen_whose_move) 
    fen_rows_list.append(fen_castling_privileges) 
    if position_key:
        fen_rows_list.insert(0, position_key)
    return tuple(fen_rows_list)

def get_whose_move_from_FEN_string(hot_FEN: str) -> bool:
    return split_fen(hot_FEN)[1] == 'w'

def get_position_dict() -> dict:
    position_dict = {}
    chess_db.execute("SELECT * from Positions")
    for position_row in chess_db.fetchall():
        hash_from_table = '/'.join(position_row[1:9]) + position_row[9] + position_row[10]
        key = position_row[0]
        position_dict[hash_from_table] = key
    return position_dict

def get_next_move_ID(tablename: str) -> int:
    tablefields = {"Moves": "MoveGame", "Games": "GameResult", "Positions": "PositionWhoseMove",
                  "OpeningNodes": "NodePosition"}
    sql_string = """INSERT INTO %s (%s) VALUES ('1')""" %(tablename,tablefields[tablename])
    chess_db.execute(sql_string)
    move_id = chess_db.lastrowid
    db_connection.rollback()
    return move_id

def build_game_dict(game_node: ChildNode, parent_index: int = 0, position_from:str = initial_position, 
                    node_level: int = 0, line_parent: int = 0, subline_index: int = 0, game_dict:dict = None):
    '''Builds a flat dictionary of chessgame nodes with tree information stored as a parent reference
    field in the values of the dictionary rather than in the hierarchical structure of the dictionary
    itself. Chessgame data is also stored in the values of the dictionary. Dictionary keys are arbitrary
    reference numbers that are unique within each individual generated dictionary; these keys are
    what is referenced in the parent field to encode tree structure.
    '''
    '''The parent_index and node_level transcribe the exact tree structure of the PyChess game that 
    the routine reads. The parent_index is an arbitrary/local index (a relational reference to the
    dictionary keys) that serves to serialize the hierarchical structure of the PyChess game tree. The 
    node_level field documents the current node's index in the PyChess game node's list of Variations. 
    These features of the original PyChess game class are only being saved in case they turn out to 
    be useful for some future purpose.
    
    The line_parent and subline_index serve to serialize the game as a collection of lines, not of individual 
    moves. Their purpose is to allow a database call to retrieve lines as lines (by finding all the moves with
    the same line_parent and subline_index) instead of having to recreate them by walking a tree one node at a time. 
    
    The main line of a game has line_parent zero (and subline_index zero, but it doesn't really matter). If there 
    is a branch at the move with key 118, the moves in that line inherit line_parent 118 and subline_index 1. If 
    there are two branches in that spot, the second line gets subline_index 2. All the moves in the main line of
    these lines get these same indexing numbers. The indexing process is the same whether the new lines start 
    from the main line or from any of the side lines: the new branch gets the line_parent number that indexes
    the specific move that they branched off from, and the subline_index simply transcribes that line's index
    in the PyChess node's list of variations. The main difference from the PyChess hierarchical structure is that 
    subsequent main line children of the non-primary node retain these indexing numbers instead of being reset to 
    zero. This allows SQL and the export routine to grab (and preserve) each entire line as a unit.
    
    '''
    global move_index
    if game_dict is None:
        game_dict = {}
    local_index = move_index
    move_index += 1
    if node_level > 0:
        testboard.set_fen(position_from)
    game_dict[local_index] = {'move_number': testboard.fullmove_number, 'position_from': position_from,
                'white_move': get_whose_move_from_FEN_string(testboard.fen()), 'move_notation': game_node.uci(), 
                'parent': parent_index, 'node_level': node_level, 'line_parent': line_parent, 'subline_index': subline_index}
    testboard.push(game_node.move)
    position_to = testboard.fen()
    game_dict[local_index]['position_to'] = position_to
    if game_node.variations:
        for local_node_level, node in enumerate(game_node.variations):
            if local_node_level == cRoot:
                new_line_parent = line_parent
                new_subline_index = subline_index
            else:
                new_line_parent = local_index
                new_subline_index = local_node_level
            build_game_dict(node, local_index, position_to, local_node_level, new_line_parent, new_subline_index, game_dict)
    return game_dict

# 2) Import games from pgn files

## 2a) Read a file containing chess game(s)

If you make your own file by copy-pasting PGN notation into one document, make sure <br>
that all the headers and the beginning of the actual notation are flush left, or the <br>
game-reading routine will choke.

In [4]:
#file_name = 'pgns/KID opening tree.txt'
#file_name = "pgns/basic slav theory.txt"
#file_name = 'pgns/slav compendium.txt'
#file_name = 'pgns/KID compendium.pgn'
#file_name = 'pgns/portisch-petrosian slav.txt'
file_name = 'pgns/Euwe slavs.txt' 
pgn_file = open(file_name)

multiple_game_list = []

while True:
    game = chess.pgn.read_game(pgn_file)
    if game is None:
        break  # end of file
    multiple_game_list.append(game)

## 2b) Initialize process-wide variables and extract the data

In [5]:
game_list = []
header_list = []
move_list = []
new_position_list = []
position_dict = get_position_dict()
game_dict = {}
testboard = chess.Board()

game_type = cRegularGame

game_index = get_next_move_ID("Games")
move_index = get_next_move_ID("Moves")
position_index = get_next_move_ID("Positions")

# Read games into dictionaries
for chess_game in multiple_game_list:
    # Reinitialize the game-specific variables
    testboard.reset()
    root_node = chess_game.game()
    first_move = root_node.variations[0]
    game_result = cUnknownResult
    for header in chess_game.headers:
        header_list.append((game_index, header, chess_game.headers[header]))
        if header == "Result":
            if chess_game.headers[header] == "1-0":
                game_result = cWhiteWin
            elif chess_game.headers[header] == "0-1":
                game_result = cBlackWin
            elif "1/2" in chess_game.headers[header]:
                game_result = cDraw
    
    # As noted above, I'm mostly just stubbing out the game records right now with zeroes.
    game_list.append((game_index, cNoRealPlayer, cNoRealPlayer, cNoRealSource, game_result, game_type, cRoundZero))
        
    ''' Build game dict, and then read its information into a list of tuples
    that will be used to update Positions and Moves tables. '''
    game_dict = build_game_dict(first_move)
    # Hash or find all positions in the game
    for game_dict_index, move_info in game_dict.items():
        fen_to_string = move_info['position_to']
        hot_hash = get_FEN_hash_from_FEN_string(fen_to_string)
        if hot_hash not in position_dict:
            position_tuple = get_tuple_for_position_insert(fen_to_string, position_index)
            new_position_list.append(position_tuple)
            position_dict[hot_hash] = position_index
            position_index +=1
        move_info['position_to_key'] = position_dict[hot_hash]
        move_info['position_from_key'] = position_dict[get_FEN_hash_from_FEN_string(move_info['position_from'])]
    
    # Write game moves to their own tuple, linked with to and from positions
        # First line of SQL: logistics stuff
        move_number = move_info['move_number']
        whose_move = move_info['white_move']
        # Second line of SQL: Move info
        full_move_notation = move_info['move_notation']
        square_from = full_move_notation[:2] 
        square_to = full_move_notation[2:4]
        promotion_piece = '' if len(full_move_notation) < 5 else full_move_notation[4]
        # Third line of SQL: position and hierarchy info
        position_from = move_info['position_from_key']
        position_to = move_info['position_to_key']
        # Original hierarchical info from PyChess game class
        line_level = move_info['node_level']
        move_parent = move_info['parent']
        # New hierarchical info for grokking lines all at once
        line_parent = move_info['line_parent']
        subline_index = move_info['subline_index']
        # make the tuple
        move_tuple = (game_dict_index, game_index, move_number, whose_move, 
                      square_from, square_to, promotion_piece, 
                      line_level, position_from, position_to, move_parent, line_parent, subline_index
                      )
        move_list.append(move_tuple)
        # Don't need to do this (next line) upon tuplization. The move_index is updated when the game_dict is built.
        # move_index += 1
    game_index += 1


## 2c) Take a look at variables, if you want, to see if they make sense.

In [6]:
#len(multiple_game_list)
header_list
#new_position_list  
#move_list
#game_dict
#game_list  

[(43, 'Event', 'World Championship 16th'),
 (43, 'Site', 'Netherlands'),
 (43, 'Date', '1935.11.16'),
 (43, 'Round', '20'),
 (43, 'White', 'Euwe, Max'),
 (43, 'Black', 'Alekhine, Alexander'),
 (43, 'Result', '1-0'),
 (43, 'ECO', 'D17'),
 (43, 'EventCountry', 'NED'),
 (43, 'EventDate', '1935.10.03'),
 (43, 'EventRounds', '30'),
 (43, 'EventType', 'match'),
 (43, 'PlyCount', '81'),
 (43, 'Source', 'ChessBase'),
 (43, 'SourceDate', '1998.09.30'),
 (44, 'Event', 'World Championship 16th'),
 (44, 'Site', 'Netherlands'),
 (44, 'Date', '1935.11.19'),
 (44, 'Round', '21'),
 (44, 'White', 'Alekhine, Alexander'),
 (44, 'Black', 'Euwe, Max'),
 (44, 'Result', '0-1'),
 (44, 'ECO', 'D17'),
 (44, 'EventCountry', 'NED'),
 (44, 'EventDate', '1935.10.03'),
 (44, 'EventRounds', '30'),
 (44, 'EventType', 'match'),
 (44, 'PlyCount', '80'),
 (44, 'Source', 'ChessBase'),
 (44, 'SourceDate', '1998.09.30')]

## 2d) Write the data to the database

In [7]:
for game_record in game_list:
    chess_db.execute("""INSERT INTO Games (GameKey, GameWhitePlayer, GameBlackPlayer, 
                 GameSource, GameResult, GameSourceType, GameRound)
                 VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s')"""  %(game_record))
    
for position_tuple in new_position_list:
    chess_db.execute("""INSERT INTO Positions (PositionKey, PositionRank8, PositionRank7, PositionRank6, PositionRank5, 
            PositionRank4, PositionRank3, PositionRank2, PositionRank1 , PositionWhoseMove, PositionCastlingPrivileges)
            VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')"""  %(position_tuple))
    
for move_tuple in move_list:
    chess_db.execute("""INSERT INTO Moves (MoveKey, MoveGame, MoveNumber, MoveWhoseMove, SquareFrom, SquareTo,
             MovePromotionPiece, LineLevel, PositionFrom, PositionTo, MoveParent, LineParent, SublineIndex)
    VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')"""  %(move_tuple))
        
for header_tag in header_list:
    chess_db.execute("""INSERT INTO Gamestubs (StubGameKey, StubHeaderField, StubHeaderValue)
    VALUES ('%s', '%s', '%s')"""  %(header_tag))

In [8]:
db_connection.commit()

# 3) Create opening node/tree records

## 3a) Some more consts and enums

In [9]:
OPENING_POSITION_KEY = 1
NODE_IS_ROOT = 1
NODE_IS_NOT_ROOT = 0
NO_STEM_GAME_YET = 0

## 3b) Form the data and execute the import query.

In [None]:
KIDParentNode = 7

# These are Position Key, Node Name, and NodeAbbrev
KID_node_info = [
    [389, 'Exchange variation', 'KIDExVar'],
[393, 'Petrosian variation', 'KIDPetros'],
[396, 'Averbakh variation', 'KIDAverb'],
[397, 'h3 variation', 'KIDh3'],
[400, 'Bd3 variation', 'KIDBd3'],
[403, 'Four Pawns Attack', 'KID4Pawns'],
[407, 'Fianchetto wi Ne2', 'KIDEnglFianch'],
[388, 'Classical Main line', 'KIDClass'],
[413, 'Fianchetto 1950s-style', 'KID1950sFianch']
]

In [None]:
for node_fields in KID_node_info:
    opening_tuple = (node_fields[1], node_fields[2], node_fields[0], KIDParentNode, NO_STEM_GAME_YET, NODE_IS_NOT_ROOT)
    chess_db.execute("""INSERT INTO OpeningNodes (NodeName, NodeAbbrev, NodePositionKey, 
             NodeParentNode, NodeStemGame, NodeIsRoot)
    VALUES ('%s', '%s', '%s', '%s', '%s', '%s')"""  %(opening_tuple))

In [None]:
db_connection.commit()

## 3c) You can also change data in the tables

In [10]:
chess_db.execute("""UPDATE OpeningNodes SET NodeAbbrev = 'slavEuweMoroz' WHERE NodeKey = 25""")

<sqlite3.Cursor at 0x58dcce0>

In [11]:
chess_db.execute("""UPDATE OpeningNodes SET NodeName = 'Euwe/Morozevich' WHERE NodeKey = 25""")

<sqlite3.Cursor at 0x58dcce0>

In [12]:
chess_db.execute("""UPDATE Gamestubs SET StubHeaderValue = '16th World Championship' where stubgamekey > 42 
AND StubHeaderField = 'Event'""")

<sqlite3.Cursor at 0x58dcce0>

In [13]:
db_connection.commit()

In [14]:
chess_db.close()
db_connection.close()