# Using TF Records

In [1]:
import sys
sys.path.append('../')
import tensorflow as tf
import numpy as np
import chess
from multiprocessing import Pool
import csv
import os
import pandas as pd
from sqlalchemy.orm import  Session
from tqdm import tqdm
from Chess_Model.src.model.classes.cnn_scorer import boardCnnEval
from joblib import load
current_working_directory = os.getcwd()

# print output to the console
print(current_working_directory)


C:\Users\ethan\git\Full_Chess_App


# Importing my stuff

In [2]:
from Chess_Model.src.model.config.config import Settings
from Chess_Model.src.model.classes.sqlite.dependencies import  fetch_one_game_position, fetch_all_game_positions_rollup,get_rollup_row_count,board_to_GamePostition
from Chess_Model.src.model.classes.sqlite.models import GamePositions
from Chess_Model.src.model.classes.sqlite.database import SessionLocal

# Creating a sample analysis row of a multiple boards

In [3]:
sample = []
with SessionLocal() as db:
    game1 = fetch_one_game_position(db=db)
    sample.append(game1)
    game2 = fetch_one_game_position(db=db)
    sample.append(game2)

dataset = []
evaluator = boardCnnEval()
for game in sample:
    evaluator.setup_parameters_gamepositions(game=game)
    dataset.append(evaluator.get_board_scores())
dataset

[{'white pawns': 8,
  'white knights': 2,
  'white bishops': 2,
  'white rooks': 2,
  'white queens': 1,
  'black pawns': 8,
  'black knights': 2,
  'black bishops': 2,
  'black rooks': 2,
  'black queens': 1,
  'total black pieces': 15,
  'total white pieces': 15,
  'white has bishop pair': 1,
  'black has bishop pair': 1,
  'white has knight bishop pair': 1,
  'black has knight bishop pair': 1,
  'white has knight pair': 1,
  'black has knight pair': 1,
  'white moves': 30,
  'black moves': 20,
  'white to black moves': 1.5,
  'white to white moves': 0.6666666666666666,
  'Beginning Game': 1,
  'Middle Game': 0,
  'End Game': 0,
  'black can be drawn': 0,
  'black promote to queen': 0,
  'white can be drawn': 0,
  'white promote to queen': 0,
  'can be drawn': 0,
  'black queen can be taken': 0,
  'white queen can be taken': 0,
  'white attacks': 0,
  'black attacks': 0,
  'white can attack': 0,
  'black can attack': 0,
  'checkmate': 0,
  'stalemate': 0,
  'white turn': 0,
  'black 

# Functions for formatting features

In [4]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()  # BytesList won't unpack a string from an EagerTensor
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def serialize_example(features):
    """
    Creates a tf.train.Example message ready to be written to a file.
    """
    # Create a dictionary mapping the feature name to the tf.train.Feature
    feature = {}
    for key, value in features.items():
        if isinstance(value, int):
            feature[key] = _float_feature(value)
        elif isinstance(value, float):
            feature[key] = _float_feature(value)
        elif isinstance(value, np.ndarray):
            # Flatten the array and convert it to bytes
            flat_array = value.tolist()
            feature[key] = _bytes_feature(tf.io.serialize_tensor(flat_array))
        else:
            raise ValueError(f"Unsupported data type: {type(value)} for key: {key}")
    
    # Create a Features message using tf.train.Example
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

# Example of serialized data

In [5]:
serialized_example = serialize_example(dataset[0])

# Creating blank file for records

In [7]:
 def create_csv():
        # Check if the file exists and remove it
    if os.path.exists(Settings().recordsData):
        os.remove(Settings().recordsData)

        # Create a new CSV file with the column headers
    with open(Settings().recordsData, 'w', newline='') as recordsFile:
            pass
create_csv()

# Populating Records File

In [8]:
with tf.io.TFRecordWriter(Settings().recordsData) as writer:
    for record in dataset:
        serialized_data = serialize_example(record)
        writer.write(serialized_example)

In [9]:
def process_sqlite_boards_to_records():
    evaluator = boardCnnEval()
    with tf.io.TFRecordWriter(Settings().recordsData) as writer:
        with SessionLocal() as db:
            games = fetch_all_game_positions_rollup(yield_size=500, db=db)
            for game in games:
                dataset = []
                try:
                    if game:
                        evaluator.setup_parameters_gamepositions(game=game)
                        score = evaluator.get_board_scores()

                        serialized_data = serialize_example(score)
                        writer.write(serialized_data)
                    else:
                        return 1
                except Exception as e:
                    raise Exception(e)

In [10]:
def create_feature_description(data):
    feature_description = {}
    for key, value in data.items():
        if isinstance(value, int):
            feature_description[key] = tf.io.FixedLenFeature([], tf.float32)
        elif isinstance(value, float):
            feature_description[key] = tf.io.FixedLenFeature([], tf.float32)
        elif isinstance(value, np.ndarray):
            # Arrays are serialized as strings, so we specify them as such
            feature_description[key] = tf.io.FixedLenFeature([], tf.string)
        else:
            raise ValueError(f"Unsupported data type: {type(value)} for key: {key}")
    return feature_description

feature_description = create_feature_description(dataset[0])
feature_description

{'white pawns': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'white knights': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'white bishops': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'white rooks': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'white queens': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'black pawns': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'black knights': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'black bishops': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'black rooks': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'black queens': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'total black pieces': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'total white pieces': FixedLenFeature(shape=[], dtype=tf.float32, default_value=None),
 'wh

In [11]:
def _parse_function(example_proto):
    # Parse the input tf.train.Example proto using the feature description dictionary
    parsed_features = tf.io.parse_single_example(example_proto, feature_description)
    
    # Initialize dictionaries for the three categories
    positions_data = {}
    mean_data = {}
    other_data = {}
    
    # Decode features based on type and categorize
    for key, feature in feature_description.items():
        if feature.dtype == tf.string and key.endswith('positions'):
            # Parse, reshape to 8x8 for position/matrix data
            parsed_array = tf.io.parse_tensor(parsed_features[key], out_type=tf.int32)
            positions_data[key] = tf.reshape(parsed_array, [8, 8])
        elif key in ['white mean', 'black mean', 'stalemate mean']:
            # Directly assign mean data without need for decoding
            mean_data[key] = parsed_features[key]
        else:
            # Handle other data types, decode if necessary
            if feature.dtype == tf.string:
                parsed_array = tf.io.parse_tensor(parsed_features[key], out_type=tf.int32)
                other_data[key] = parsed_array
            else:
                other_data[key] = parsed_features[key]
    
    return positions_data, other_data, mean_data

In [12]:
# Create a dataset from the TFRecord file
tfrecord_filenames = [Settings().recordsData]
dataset = tf.data.TFRecordDataset(tfrecord_filenames)

# Map the parsing function over the dataset
parsed_dataset = dataset.map(_parse_function)

# Iterate over the parsed dataset and use the data
for positions, others, means in parsed_dataset.take(5):
    print("Positions Data:", positions)
    print("Other Data:", others)
    print("Means Data:", means)
    print("\n---\n")
 

Positions Data: {'white advantage positions': <tf.Tensor: shape=(8, 8), dtype=int32, numpy=
array([[0, 1, 1, 1, 1, 1, 1, 0],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 1, 0, 0, 0, 1, 0],
       [0, 1, 0, 1, 0, 1, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]])>, 'black advantage positions': <tf.Tensor: shape=(8, 8), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1, 1, 1, 0]])>, 'castling positions': <tf.Tensor: shape=(8, 8), dtype=int32, numpy=
array([[0, 0, 1, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],


In [13]:
for record in parsed_dataset.take(1):
    print(len(record[1]))

40


In [14]:
row = 8
col = 8
chan = 0
amt_meta = 0
shape = (([row,col,chan],[0,amt_meta]),[3])
for record in parsed_dataset.take(1):
    print(len(record[0]))
    shape = (([row,col,len(record[0])],[0,len(record[1])]),[3])

print(shape)

50
(([8, 8, 50], [0, 40]), [3])


In [15]:
samp = parsed_dataset.take(1)

In [16]:
samp

<_TakeDataset element_spec=({'white advantage positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'black advantage positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'castling positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'en passant positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'white king check positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'black king check positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'white knight positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'white bishop positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'white rook positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'white queen positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'white king positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'white pawn positions': TensorSpec(shape=(8, 8), dtype=tf.int32, name=None), 'black knight position

In [17]:
row = 8
col = 8
batch_size = 1
for record in parsed_dataset.take(1):
    
    shape = (([batch_size,row,col,len(record[0])],
                   ([batch_size,len(record[1])])),
                  [batch_size,3])
shape

(([1, 8, 8, 50], [1, 40]), [1, 3])

In [18]:
scalarFile = './Chess_Model/src/model/data/scaler.joblib'
scaler = load(scalarFile)

In [19]:
def scale_features(data):
    
    features = np.array([tensor.numpy() for tensor in data.values()]).reshape(1, -1)  # Reshape to 2D array
    
    # Scale the features using the loaded scaler
    scaled_features = scaler.transform(features)
    
    # If you need to convert back to TensorFlow tensors (optional)
    # Example: converting the entire scaled array back into a dictionary of tensors with the same keys as the original
    scaled_other = {key: tf.convert_to_tensor(value, dtype=tf.float32) 
                    for key, value in zip(data.keys(), scaled_features.flatten())}

    return scaled_other
scaled_data = scale_features(others)
scaled_data['white pawns']

<tf.Tensor: shape=(), dtype=float32, numpy=1.7914455>

In [20]:
def record_generator(batch_size: int = 1):
    tfrecord_filenames = [Settings().recordsData]
    dataset = tf.data.TFRecordDataset(tfrecord_filenames)
    
    parsed_dataset = dataset.map(_parse_function)

    for record in parsed_dataset.take(batch_size):
        # Transform record[1] from a dictionary of tensors to a single tensor
        scaled_metadata = scale_features(record[1])
        
        # Now apply scaling using tf.py_function or directly if possible
        # Note: direct application requires the scaler to be adapted to work with TensorFlow operations

        yield ((record[0],scaled_metadata), record[2])
record_generator()


<generator object record_generator at 0x000001FF287DC040>

In [21]:
samp = record_generator()
sample = next(samp)
sample

(({'white advantage positions': <tf.Tensor: shape=(8, 8), dtype=int32, numpy=
   array([[0, 1, 1, 1, 1, 1, 1, 0],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [0, 0, 1, 0, 0, 0, 1, 0],
          [0, 1, 0, 1, 0, 1, 0, 1],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0]])>,
   'black advantage positions': <tf.Tensor: shape=(8, 8), dtype=int32, numpy=
   array([[0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [1, 1, 1, 1, 1, 1, 1, 1],
          [0, 1, 1, 1, 1, 1, 1, 0]])>,
   'castling positions': <tf.Tensor: shape=(8, 8), dtype=int32, numpy=
   array([[0, 0, 1, 0, 0, 0, 1, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0],
          [

In [22]:
shape

(([1, 8, 8, 50], [1, 40]), [1, 3])

In [23]:
def dataset_generator(batch_size: int = 10):
    dataset = tf.data.Dataset.from_generator(
        lambda: record_generator(batch_size=batch_size),
        output_types=((tf.int32, tf.float16), tf.float16),
        # Correcting the shapes based on your latest input
        output_shapes=(([50, 8, 8], [40]), [3])
    )
    return dataset

In [26]:
data_generator = dataset_generator(batch_size=1)
sample_take = data_generator.take(1)

# for sample in sample_take:
#     if sample is None:
#         break
#     pass

    

In [27]:
# Step 1: Prepare multiple bitboards
bitboards = [np.random.randint(2, size=(8, 8), dtype=np.int32) for _ in range(3)]  # 3 bitboard examples

# Step 2: Prepare metadata and target data
metadata = np.array([2024, 3, 17], dtype=np.int32)  # Example metadata (e.g., date)
target = np.array([1, 0, 1], dtype=np.float32)  # Example target data

# Function to serialize tensors
def serialize_tensor(tensor):
    return tf.io.serialize_tensor(tf.convert_to_tensor(tensor, dtype=tensor.dtype))

# Serialize each part
bitboards_tensor = tf.stack([tf.convert_to_tensor(board, dtype=tf.int32) for board in bitboards])
serialized_bitboards = tf.io.serialize_tensor(bitboards_tensor)
serialized_metadata = serialize_tensor(metadata)
serialized_target = serialize_tensor(target)

# Function to create a bytes feature
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if not isinstance(value, list):
        value = [value]
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))

# Create features for each part
features = {
    'bitboards': _bytes_feature(serialized_bitboards.numpy()),
    'metadata': _bytes_feature(serialized_metadata.numpy()),
    'target': _bytes_feature(serialized_target.numpy())
}

# Step 5: Combine features into a single Example
example = tf.train.Example(features=tf.train.Features(feature=features))

# Step 6: Write the example to a TFRecord file
tfrecord_file_name = 'combined_data.tfrecord'
with tf.io.TFRecordWriter(tfrecord_file_name) as writer:
    writer.write(example.SerializeToString())



In [35]:
feature_description = {
    'bitboards': tf.io.FixedLenFeature([], tf.string),
    'metadata': tf.io.FixedLenFeature([], tf.string),
    'target': tf.io.FixedLenFeature([], tf.string),
}

# Function to parse a single example
def _parse_function(example_proto):
    # Parse the input `tf.train.Example` proto using the dictionary above.
    example = tf.io.parse_single_example(example_proto, feature_description)
    bitboards = tf.io.parse_tensor(example['bitboards'], out_type=tf.int32)
    metadata = tf.io.parse_tensor(example['metadata'], out_type=tf.int32)
    target = tf.io.parse_tensor(example['target'], out_type=tf.float32)
    return bitboards, metadata, target

# Step 2: Read the TFRecord file
tfrecord_file_name = 'combined_data.tfrecord'
dataset = tf.data.TFRecordDataset([tfrecord_file_name])

# Step 3: Parse the dataset
parsed_dataset = dataset.map(_parse_function)

# Step 4: Iterate over the dataset and process the data
for bitboards, metadata, target in parsed_dataset.take(1):
    print("Bitboards:", bitboards.numpy())  # Access numpy array with .numpy()
    print("Metadata:", metadata.numpy())
    print("Target:", target.numpy())

AttributeError: 'tuple' object has no attribute 'numpy'

In [33]:
bitboards.shape

TensorShape([3, 8, 8])