# Revised TF Record Implementation

In [6]:
import sys
sys.path.append('../')
import tensorflow as tf
import numpy as np
import chess
from multiprocessing import Pool
import csv
import os
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sqlalchemy.orm import  Session
from tqdm import tqdm
from Chess_Model.src.model.classes.cnn_scorer import boardCnnEval
from joblib import load
import math
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate
from tensorflow.keras.models import Model


current_working_directory = os.getcwd()

# print output to the console
print(current_working_directory)

C:\Users\ethan\git\Full_Chess_App


# Importing my stuff

In [11]:
from Chess_Model.src.model.config.config import Settings
from Chess_Model.src.model.classes.sqlite.dependencies import  fetch_one_game_position, fetch_all_game_positions_rollup,get_rollup_row_count,board_to_GamePostition
from Chess_Model.src.model.classes.sqlite.models import GamePositions
from Chess_Model.src.model.classes.sqlite.database import SessionLocal
from Chess_Model.src.model.classes.cnn_scorer import boardCnnEval

# Constants

In [14]:
tfrecord_file_name = 'combined_data.tfrecord'
metadata_key = 'metadata'
bitboards_key = 'positions_data'
results_key = 'game_results'
recordsDir = "./Chess_Model/src/model/data/"
recordsData: str = f"{recordsDir}feature_data.tfrecord"
recordsDataCopy: str = f"{recordsDir}feature_data_copy.tfrecord"
recordsDataTrain = f"{recordsDir}train_data.tfrecord" 
recordsDataValid = f"{recordsDir}validation_data.tfrecord"
recordsDataTest: str = f"{recordsDir}test_data.tfrecord"
feature_description = {
    'bitboards': tf.io.FixedLenFeature([], tf.string),
    'metadata': tf.io.FixedLenFeature([], tf.string),
    'target': tf.io.FixedLenFeature([], tf.string),
}
scalarFile = './Chess_Model/src/model/data/scaler.joblib'
evaluator = boardCnnEval()

## Part 1: Pipeline from SqLite DB to Record File

In [19]:
def split_board_scores(scores_dict: dict):

    metadata = list(scores_dict[metadata_key].values())

    bitboards = list(scores_dict[bitboards_key].values())
    

    game_results = list(scores_dict[results_key].values())
    
    return bitboards, metadata, game_results
    
def serialize_data(scores_dict):

        bb,md,gr = split_board_scores(scores_dict)
        
        bitboards_tensor = tf.stack([tf.convert_to_tensor(board, dtype=tf.int8) for board in bb])
        serialized_bitboards = tf.io.serialize_tensor(bitboards_tensor)
        
        metadata_tensor = tf.convert_to_tensor(md,dtype=tf.float16)
        serialized_metadata = serialize_tensor(metadata_tensor)
        
        target_tensor = tf.convert_to_tensor(gr,dtype=tf.float16)
        serialized_target = serialize_tensor(target_tensor)
        
        return serialized_bitboards, serialized_metadata, serialized_target

In [20]:
def create_file(file):
    # Check if the file exists and remove it
    if os.path.exists(file):
        os.remove(file)
    
        # Create a new CSV file with the column headers
    with open(file, 'w', newline='') as File:
        pass
        
def process_sqlite_boards_to_records(batch_size: int = 5):
    #initializes tf record file
    create_file(recordsData)
    
    with tf.io.TFRecordWriter(recordsData) as writer:
        with SessionLocal() as db:
            #get count for load bar
            row_count = get_rollup_row_count(db=db)

            #getter for generator for getting data from db
            batch = fetch_all_game_positions_rollup(yield_size=500, db=db)
            serialized_examples = []  # List to accumulate serialized examples
            for game in tqdm(batch, total=row_count, desc="Processing Feature Data"):
                try:
                    if game:
                        #initialize evaluator to board
                        evaluator.setup_parameters_gamepositions(game=game)
                        #get scores dict
                        score = evaluator.get_board_scores_records()

                        #serialize data for saving
                        serialized_data = serialize_data(score)
                        
                        features = {
                        'bitboards': _bytes_feature(serialized_data[0].numpy()),
                        'metadata': _bytes_feature(serialized_data[1].numpy()),
                        'target': _bytes_feature(serialized_data[2].numpy())
                        }
                        
                        serialized_data = tf.train.Example(features=tf.train.Features(feature=features))

                        serialized_examples.append(serialized_data)

                        # Check if we've accumulated enough examples to write a batch
                        if len(serialized_examples) >= batch_size:
                            for serialized_example in serialized_examples:
                                writer.write(serialized_example.SerializeToString())
                            serialized_examples = []  # Reset the list after writing
                    else:
                        return 1
                except Exception as e:
                    raise Exception(e)
            
            # Write any remaining examples after looping through all games
            for serialized_example in serialized_examples:
                writer.write(serialized_example)
process_sqlite_boards_to_records()

Processing Feature Data: 100%|████████████████████████| 15045/15045 [02:15<00:00, 110.86it/s]


1

## Part 2: Shuffle/Split file into train, validation and test

In [None]:
def copy_csv(source_file, destination_file):
    shutil.copy(source_file, destination_file)

def split_csv(chunksize=10000):
        if os.path.exists(train_file):
            os.remove(train_file)
        if os.path.exists(test_file):
            os.remove(test_file)
        if os.path.exists(validation_file):
            os.remove(validation_file)
        if os.path.exists(copy_data):
            os.remove(copy_data)
        copy_csv(source_file=filename, destination_file=copy_data)
        
        filename = copy_data
        total_rows = get_row_count(filename=filename)

        #make sure no shared inices
        # Split indices for training+testing and validation
        validation_size = validation_size  # 20% of the data for validation
        train_test_indices = set(range(total_rows))
        validation_indices = set(random.sample(list(train_test_indices), int(total_rows * validation_size)))

        train_test_indices -= validation_indices  # Remove validation indices from training+testing pool

        # Further split training+testing indices into training and testing
        test_indices = set(random.sample(list(train_test_indices), int(len(train_test_indices) * test_size)))

        processed_rows = 0


        for chunk in pd.read_csv(filename, chunksize=chunksize):


            chunk_train = chunk.iloc[[i - processed_rows in train_test_indices and i - processed_rows not in test_indices for i in range(processed_rows, processed_rows + len(chunk))]]
            chunk_test = chunk.iloc[[i - processed_rows in test_indices for i in range(processed_rows, processed_rows + len(chunk))]]
            chunk_validation = chunk.iloc[[i - processed_rows in validation_indices for i in range(processed_rows, processed_rows + len(chunk))]]

            # Write to respective files
            mode = 'a' if processed_rows > 0 else 'w'
            chunk_train.to_csv(train_file, mode=mode, index=False, header=(mode == 'w'))
            chunk_test.to_csv(test_file, mode=mode, index=False, header=(mode == 'w'))
            chunk_validation.to_csv(validation_file, mode=mode, index=False, header=(mode == 'w'))

            # Update processed rows counter
            processed_rows += len(chunk)

## Part 3: Create Base Parse Function for records

## Part 4: Create scaler from train data

## Part 5: Create scaled parse function

## Part 6: Calculate shape

## Part 7: Create tf dataset with scaled parse function

## Part 8: Create model

## Part 9: Pass tf dataset into model fit

In [5]:
def serialize_tensor(tensor):
    return tf.io.serialize_tensor(tf.convert_to_tensor(tensor, dtype=tensor.dtype))

def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if not isinstance(value, list):
        value = [value]
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))

