In [3]:
import tensorflow as tf
print(f"TensorFlow has access to the following devices:\n{tf.config.list_physical_devices()}")

# See TensorFlow version
print(f"TensorFlow version: {tf.__version__}")

%load_ext tensorboard

TensorFlow has access to the following devices:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
TensorFlow version: 2.9.0


In [4]:
import numpy as np
from matplotlib import pyplot as plt

from matplotlib.colors import ListedColormap

# RGB
colors_rgb = {
    0: (0x00, 0x00, 0x00),
    1: (0x00, 0x74, 0xD9),
    2: (0xFF, 0x41, 0x36),
    3: (0x2E, 0xCC, 0x40),
    4: (0xFF, 0xDC, 0x00),
    5: (0xA0, 0xA0, 0xA0),
    6: (0xF0, 0x12, 0xBE),
    7: (0xFF, 0x85, 0x1B),
    8: (0x7F, 0xDB, 0xFF),
    9: (0x87, 0x0C, 0x25),
}

_float_colors = [tuple(c / 255 for c in col) for col in colors_rgb.values()]
arc_cmap = ListedColormap(_float_colors)

class ArcColors:
    BLACK = 0
    BLUE = 1
    RED = 2
    GREEN = 3
    YELLOW = 4
    GREY = 5
    FUCHSIA = 6
    ORANGE = 7
    TEAL = 8
    BROWN = 9

def plot_grid(grid1: np.ndarray, grid2: np.ndarray = None):
    
    if grid2 is None:
        fig, ax = plt.subplots()
        ax.pcolormesh(
            grid1,
            cmap=arc_cmap,
            rasterized=True,
            vmin=0,
            vmax=9,
        )
        ax.set_xticks(np.arange(0, grid1.shape[1], 1))
        ax.set_yticks(np.arange(0, grid1.shape[0], 1))
        ax.grid()
        ax.set_aspect(1)
        ax.invert_yaxis()
        plt.show()
        return
    
    fig, axs = plt.subplots(1, 2)

    axs[0].pcolormesh(
        grid1,
        cmap=arc_cmap,
        rasterized=True,
        vmin=0,
        vmax=9,
    )
    axs[0].set_xticks(np.arange(0, grid1.shape[1], 1))
    axs[0].set_yticks(np.arange(0, grid1.shape[0], 1))
    axs[0].grid()
    axs[0].set_aspect(1)
    axs[0].invert_yaxis()

    axs[1].pcolormesh(
        grid2,
        cmap=arc_cmap,
        rasterized=True,
        vmin=0,
        vmax=9,
    )
    axs[1].set_xticks(np.arange(0, grid2.shape[1], 1))
    axs[1].set_yticks(np.arange(0, grid2.shape[0], 1))
    axs[1].grid()
    axs[1].set_aspect(1)
    axs[1].invert_yaxis()
    plt.show()

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Reshape, Conv2DTranspose
from sklearn.model_selection import train_test_split
import json
import matplotlib.pyplot as plt

In [6]:
import os
# Load the data from the data/train directory containing the json files
# files are named {random_number}.json so you need to list all the files in the directory and then load the data from each file
training_data_dir = "../data/training"
evaluating_data_dir = "../data/evaluation"

# List of JSON file paths
training_file_paths = [os.path.join(training_data_dir, f) for f in os.listdir(training_data_dir)]
evaluating_file_paths = [os.path.join(evaluating_data_dir, f) for f in os.listdir(evaluating_data_dir)]

In [7]:
training_file_paths[0]

'../data/training/a85d4709.json'

In [8]:
# Function to load data from multiple files

def load_data(file_paths):
    train_data = []
    test_data = []
    for file_path in file_paths:
        rules_input = []
        test_input = []
        with open(file_path, 'r') as f:
            data = json.load(f)
            for item in data['train']:
                rules_input.append([
                    np.array(item['input'], dtype=np.int64),
                    np.array(item['output'], dtype=np.int64)
                ])
            for item in data['test']:
                test_input.append([
                    np.array(item['input'], dtype=np.int64),
                    np.array(item['output'], dtype=np.int64)
                ])
        train_data.append(rules_input)
        test_data.append(test_input)
    return train_data, test_data

# Load data from multiple files
training_train_data, training_test_data = load_data(training_file_paths)
evaluating_train_data, evaluating_test_data = load_data(evaluating_file_paths)

In [9]:
#from skimage.transform import resize

def normalize_data(data):
    return data / 9.0

def denormalize_data(data):
    return data * 9.0


# test expand_matrix
test_matrix = training_train_data[0][0][0]

def expand_squared_matrix(matrix, size):
    max_size = 32
    current_size = matrix.shape[0]
    if current_size == max_size:
        return normalize_data(matrix)
    ratio = max_size // current_size
    divisible = ratio * current_size == max_size
    if divisible is True:
       return normalize_data(matrix.repeat(ratio, axis=0).repeat(ratio, axis=1))

    ## if the size is not divisible by 32
    ## we need to add padding and center the reiszed image
    floor_ratio = np.floor(ratio)
    resized_matrix = matrix.repeat(floor_ratio, axis=0).repeat(floor_ratio, axis=1)
    pad_size = max_size - resized_matrix.shape[0]
    padded_matrix = np.full(size, 0)
    padded_matrix[pad_size//2:pad_size//2+resized_matrix.shape[0], pad_size//2:pad_size//2+resized_matrix.shape[1]] = resized_matrix
    return normalize_data(padded_matrix)


def expand_rectangular_matrix(matrix, size):
    max_size = 32
    current_size = matrix.shape[0]
    current_width = matrix.shape[1]
    if current_size == max_size and current_width == max_size:
        return normalize_data(matrix)
    ratio = max_size // current_size
    ratio_width = max_size // current_width
    divisible = ratio * current_size == max_size and ratio_width * current_width == max_size
    if divisible is True:
       return normalize_data(matrix.repeat(ratio, axis=0).repeat(ratio_width, axis=1))

    ## if the size is not divisible by 32
    ## we need to add padding and center the reiszed image
    floor_ratio = np.floor(ratio)
    floor_ratio_width = np.floor(ratio_width)
    resized_matrix = matrix.repeat(floor_ratio, axis=0).repeat(floor_ratio_width, axis=1)
    pad_size = max_size - resized_matrix.shape[0]
    pad_size_width = max_size - resized_matrix.shape[1]
    padded_matrix = np.full(size, 0)
    padded_matrix[pad_size//2:pad_size//2+resized_matrix.shape[0], pad_size_width//2:pad_size_width//2+resized_matrix.shape[1]] = resized_matrix
    return normalize_data(padded_matrix)

def expand_matrix(matrix, size):
    if matrix.shape[0] == matrix.shape[1]:
        return expand_squared_matrix(matrix, size)
    return expand_rectangular_matrix(matrix, size)

expanded_matrix = expand_matrix(test_matrix, (32, 32))
expanded_matrix

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.55555556, 0.55555556,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.55555556, 0.55555556,
        0.        ],
       ...,
       [0.        , 0.55555556, 0.55555556, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.55555556, 0.55555556, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [10]:
## now we created input a, b, c and output in a array
def extract_data(train, test, max_pairs=5):
    inputs_a = []
    input_c = []
    output = []
    for inputs in train:
        matrix = []
        for i, element in enumerate(inputs):
            if i == max_pairs:
                break
            expanded_0 = expand_matrix(element[0], (32, 32))
            reshaped_0 = np.reshape(expanded_0, (1, 32*32))
            matrix.append(reshaped_0)
            expanded_1 = expand_matrix(element[1], (32, 32))
            reshaped_1 = np.reshape(expanded_1, (1, 32*32))
            matrix.append(reshaped_1)
        ## generate a matrix of 64x(5 max pairs)
        if len(matrix) < max_pairs*2:
            for i in range(max_pairs*2 - len(matrix)):
                matrix.append(np.zeros((1, 32*32)))
        inputs_a.append(np.array(matrix))

    for inputs in test:
        first_element = inputs[0]
        input_c.append(first_element[0])
        output.append(first_element[1])


    reshaped_input_a = [tf.reshape(tf.convert_to_tensor(x, np.int64), ) for x in inputs_a]
    reshaped_input_c = [tf.reshape(expand_matrix(x, (32, 32)), (1, 32*32)) for x in input_c]
    reshaped_output = [tf.reshape(expand_matrix(x, (32, 32)), (1, 32*32)) for x in output]
    A = tf.data.Dataset.from_tensor_slices(reshaped_input_a)
    C = tf.data.Dataset.from_tensor_slices(reshaped_input_c)
    Abc = tf.data.Dataset.zip((A,C)).map(lambda a,c: {'a_input': a, 'c_input': c})
    z_true = tf.data.Dataset.from_tensor_slices(reshaped_output)
    return tf.data.Dataset.zip((Abc, z_true))

def generate_rotate_matrix(matrix_list):
    new_matrix_list = []
    for matrix in matrix_list:
        for i in range(4):
            new_matrix_list.append(np.rot90(matrix, i+1))
    return new_matrix_list

def generate_flip_matrix(matrix_list):
    new_matrix_list = []
    for matrix in matrix_list:
        new_matrix_list.append(np.flip(matrix, 0))
        new_matrix_list.append(np.flip(matrix, 1))
    return new_matrix_list

def generate_all_transformations(matrix):
    rotated_matrix = generate_rotate_matrix(matrix)
    flipped_matrix = generate_flip_matrix(matrix)
    return rotated_matrix + flipped_matrix

def extract_data_V1(train, test, max_pairs=5):
    inputs_a = []
    input_c = []
    output = []

    for inputs in train:
        matrix = []
        for i, element in enumerate(inputs):
            if i == max_pairs:
                break
            expanded_0 = expand_matrix(element[0], (32, 32))
            matrix.append(expanded_0)
            expanded_1 = expand_matrix(element[1], (32, 32))
            matrix.append(expanded_1)
        ## generate a matrix of 64x(5 max pairs)
        if len(matrix) < max_pairs*2:
            for i in range(max_pairs*2 - len(matrix)):
                matrix.append(np.zeros((32,32)))
        ## transpose the matrix to have the correct shape
        inputs_a.append(np.transpose(np.array(matrix), (1, 2, 0)))

    for inputs in test:
        first_element = inputs[0]
        input_c.append(expand_matrix(first_element[0], (32, 32)))
        output.append(expand_matrix(first_element[1], (32, 32)))

    synth_input_a = generate_all_transformations(inputs_a)
    synth_input_c = generate_all_transformations(input_c)
    synth_output = generate_all_transformations(output)

    reshaped_input_a = [tf.convert_to_tensor(np.expand_dims(x, axis=0), dtype=tf.float16) for x in synth_input_a]
    reshaped_input_c = [tf.convert_to_tensor(np.expand_dims(x, axis=0), dtype=tf.float16) for x in synth_input_c]
    reshaped_output = [tf.convert_to_tensor(np.expand_dims(x, axis=0), dtype=tf.float16) for x in synth_output]
    A = tf.data.Dataset.from_tensor_slices(reshaped_input_a)
    C = tf.data.Dataset.from_tensor_slices(reshaped_input_c)
    Abc = tf.data.Dataset.zip((A,C)).map(lambda a,c: {'a_input': a, 'c_input': c})
    z_true = tf.data.Dataset.from_tensor_slices(reshaped_output)
    return tf.data.Dataset.zip((Abc, z_true))

train_data = extract_data_V1(training_train_data, training_test_data)
eval_data = extract_data_V1(evaluating_train_data, evaluating_test_data)

### print the size of the data
print(train_data.cardinality())

Metal device set to: Apple M2 Max


2024-06-16 00:27:06.436490: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-06-16 00:27:06.436928: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


tf.Tensor(2400, shape=(), dtype=int64)


In [11]:
# check the shape of the data A

input_layer = tf.keras.layers.Input(shape=(32, 32, 10))
conv_layer = tf.keras.layers.Conv2D(32, 3, activation='relu')(input_layer)
model = tf.keras.Model(inputs=input_layer, outputs=conv_layer)

# Now you can use this tensor as input to the model
output = model(train_data.take(1).as_numpy_iterator().next()[0]['a_input'])
print(output.shape)  # This should work without error

#
# input_layer = tf.keras.layers.Input(shape=(32, 32, 1))
# conv_layer = tf.keras.layers.Conv2D(32, 3, activation='relu')(input_layer)
# model = tf.keras.Model(inputs=input_layer, outputs=conv_layer)
#
# # Now you can use this tensor as input to the model
# output = model(train_data.take(1).as_numpy_iterator().next()[0]['c_input'])
# print(output.shape)  # This should work without error


(1, 30, 30, 32)


In [12]:
def display_output(output_1, output_2=None):
    output_1 = output_1.reshape(32,32)
    output_1 = denormalize_data(output_1)
    
    if output_2 is None:
        plot_grid(output_1)
        return
    output_2 = denormalize_data(output_2)
    output_2 = output_2.reshape(32,32)
    plot_grid(output_1, output_2)

train_data.take(1).as_numpy_iterator().next()[0]['a_input'].shape
#
#display_output(train_data.take(1).as_numpy_iterator().next()[0]['a_input'], train_data.take(1).as_numpy_iterator().next()[0]['b_input'])

(1, 32, 32, 10)

In [13]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

def create_model_V0(max_pairs=5):
    a_input = keras.layers.Input(shape=(32*32*10), name='a_input')
    x = keras.layers.Flatten()(a_input)
    x = keras.layers.Dense(32*32)(x)  # Removed input_dim parameter

    c_input = keras.layers.Input(shape=(32*32), name='c_input')
    c = keras.layers.Flatten()(c_input)
    c = keras.layers.Dense(32*32)(c)

    z = keras.layers.concatenate(axis=1,inputs=[x, c])
    z = keras.layers.Dense(32*32, activation='relu')(z)

    model = keras.models.Model(inputs=[a_input, c_input], outputs=z)
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
    return model


def create_model_v1(max_pairs=5):
    # First input branch
    input1 = Input(shape=(32, 32, 10), name='a_input')  # 32x32 images with 10 channels
    x = layers.Masking(mask_value=0.0)(input1)
    x = layers.Conv2D(32, 3, strides=1, padding="same", activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(2)(x)

    # Second input branch
    input2 = Input(shape=(32, 32, 1), name='c_input')  # 32x32 images with 1 channel
    y = layers.Conv2D(32, 3, strides=1, padding="same", activation="relu")(input2)
    y = layers.BatchNormalization()(y)
    y = layers.Conv2D(32, 3, padding="same", activation="relu")(y)
    y = layers.BatchNormalization()(y)
    y = layers.MaxPooling2D(2)(y)

    # Concatenate the two branches
    concatenated = layers.concatenate([x, y])

    # Additional layers for better learning
    z = layers.Conv2D(64, 3, padding="same", activation="relu")(concatenated)
    z = layers.BatchNormalization()(z)
    z = layers.Conv2D(64, 3, padding="same", activation="relu")(z)
    z = layers.BatchNormalization()(z)
    z = layers.MaxPooling2D(2)(z)

    z = layers.Conv2D(128, 3, padding="same", activation="relu")(z)
    z = layers.BatchNormalization()(z)
    z = layers.Conv2D(128, 3, padding="same", activation="relu")(z)
    z = layers.BatchNormalization()(z)

    z = layers.UpSampling2D(2)(z)
    z = layers.Conv2D(64, 3, padding="same", activation="relu")(z)
    z = layers.BatchNormalization()(z)
    z = layers.Conv2D(64, 3, padding="same", activation="relu")(z)
    z = layers.BatchNormalization()(z)
    z = layers.UpSampling2D(2)(z)
    z = layers.Conv2D(32, 3, padding="same", activation="relu")(z)
    z = layers.BatchNormalization()(z)
    z = layers.Conv2D(32, 3, padding="same", activation="relu")(z)
    z = layers.BatchNormalization()(z)
    z = layers.Conv2D(1, 3, padding="same", activation="sigmoid")(z)  # Output shape: (32, 32, 1)

    model = Model(inputs=[input1, input2], outputs=z)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = create_model_v1()
model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError(), metrics=['accuracy'])
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 a_input (InputLayer)           [(None, 32, 32, 10)  0           []                               
                                ]                                                                 
                                                                                                  
 masking (Masking)              (None, 32, 32, 10)   0           ['a_input[0][0]']                
                                                                                                  
 c_input (InputLayer)           [(None, 32, 32, 1)]  0           []                               
                                                                                                  
 conv2d_1 (Conv2D)              (None, 32, 32, 32)   2912        ['masking[0][0]']          

In [14]:
def extract_data_for_transformer(train, test, max_pairs=5):
    pairs = []
    # print(len(train[0]))
    for i, element in enumerate(train):
        if i == max_pairs:
            break
        expanded_0 = expand_matrix(element[0], (32, 32))
        expanded_1 = expand_matrix(element[1], (32, 32))
        pairs.append(expanded_0)
        pairs.append(expanded_1)
        # ## generate a matrix of 64x(5 max pairs)
    # if len(train)*2 < max_pairs*2:
    #     for i in range(max_pairs*2 - len(train) * 2):
    #         pairs.append(np.zeros((32,32)))

    # pairs.append(np.zeros((32,32)))
    # pairs.append(np.zeros((32,32)))
    # pairs.append(np.zeros((32,32)))

    final_test = expand_matrix(test[0][0], (32, 32))
    attended_output = expand_matrix(test[0][1], (32, 32))
    return pairs, final_test, attended_output

def extract_batch (data, test):
    batches = []
    for i in range(0, len(data)):
        extracted = extract_data_for_transformer(data[i], test[i])
        batches.append(extracted)
        break
        # for j in range(3):
        #     rotated = [[np.rot90(x, j+1) for x in extracted[0]], np.rot90(extracted[1], j+1), np.rot90(extracted[2], j+1)]
        #     batches.append(rotated)
        #     batches.append([[np.flip(x, 0) for x in rotated[0]], np.flip(rotated[1], 0), np.flip(rotated[2], 0)])
        # batches.append([[np.flip(x, 1) for x in extracted[0]], np.flip(extracted[1], 1), np.flip(extracted[2], 1)])

    # ## data augmentation by rotating the matrix by 90, 180, 270 and fliping the matrix
    # for data in batches:
    #     _pairs = data[0]
    #     _final_test = data[1]
    #     _attended_output = data[2]
    #     for i in range(3):
    #         batches.append([[np.rot90(x, i+1) for x in _pairs], np.rot90(_final_test, i+1), np.rot90(_attended_output, i+1)])
    #     batches.append([[np.flip(x, 1) for x in _pairs], np.flip(_final_test, 1), np.flip(_attended_output, 1)])

    return batches

train_data = extract_batch(training_train_data, training_test_data)
eval_data = extract_batch(evaluating_train_data, evaluating_test_data)

len(train_data)

1

In [15]:
# def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
#     # Normalization and Self-Attention
#     x = layers.LayerNormalization(epsilon=1e-6)(inputs)
#     x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
#     x = layers.Dropout(dropout)(x)
#     res = x + inputs  # Residual connection
# 
#     # Feed Forward Part
#     x = layers.LayerNormalization(epsilon=1e-6)(res)
#     x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation='relu')(x)
#     x = layers.Dropout(dropout)(x)
#     x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
#     return x + res  # Residual connection
# 
# def positional_encoding(max_len, depth):
#     positions = np.arange(max_len)[:, np.newaxis]
#     depths = np.arange(depth)[np.newaxis, :]
#     angle_rates = 1 / np.power(10000, (2 * (depths // 2)) / np.float32(depth))
#     angle_rads = positions * angle_rates
# 
#     # Apply the sin function to even indices in the array; 2i
#     angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
# 
#     # Apply the cos function to odd indices in the array; 2i+1
#     angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
# 
#     pos_encoding = angle_rads[np.newaxis, ...]
#     return tf.cast(pos_encoding, dtype=tf.float32)

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs  # Residual connection

    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation='relu')(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res  # Residual connection

def positional_encoding(max_len, depth):
    positions = np.arange(max_len)[:, np.newaxis]
    depths = np.arange(depth)[np.newaxis, :]
    angle_rates = 1 / np.power(10000, (2 * (depths // 2)) / np.float32(depth))
    angle_rads = positions * angle_rates

    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)



In [16]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, Input

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, Input

def positional_encoding(length, depth):
    depth = depth // 2
    positions = np.arange(length)[:, np.newaxis]
    depths = np.arange(depth)[np.newaxis, :] / depth
    angle_rates = 1 / (10000**depths)
    angle_rads = positions * angle_rates
    pos_encoding = np.concatenate([np.sin(angle_rads), np.cos(angle_rads)], axis=-1)
    pos_encoding = pos_encoding[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = layers.Dropout(dropout)(x)
    x = layers.Add()([x, inputs])
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    x = layers.Dense(ff_dim, activation=tf.nn.gelu)(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Dense(inputs.shape[-1])(x)
    x = layers.Add()([x, inputs])
    return x

def create_transformer_model(input_shape):
    input_layer = Input(shape=(None, input_shape[0], input_shape[1], input_shape[2]))
    pos_encoding = positional_encoding(input_shape[0] * input_shape[1], input_shape[2])

    def process_pair(input_tensor):
        x = layers.Reshape((input_shape[0] * input_shape[1], input_shape[2]))(input_tensor)
        x = x + pos_encoding
        for _ in range(2):
            x = transformer_encoder(x, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)
        return x

    input_pairs = layers.Lambda(lambda x: tf.reshape(x, (-1, input_shape[0] * input_shape[1], input_shape[2])))(input_layer)
    processed_pairs = process_pair(input_pairs)
    
    x = layers.LayerNormalization(epsilon=1e-6)(processed_pairs)
    for _ in range(2):
        x = transformer_encoder(x, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)

    final_input = Input(shape=input_shape)
    flattened_final_input = layers.Reshape((input_shape[0] * input_shape[1], input_shape[2]))(final_input)
    x_final = flattened_final_input + pos_encoding

    for _ in range(2):
        x_final = transformer_encoder(x_final, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)

    flattened_x_final = layers.Reshape((-1,))(x_final)

    def repeat_vector(args):
        x, rep = args
        return tf.repeat(x, repeats=rep, axis=1)

    expanded_final_input = layers.Lambda(repeat_vector)([flattened_x_final, tf.shape(x)[1]])

    def reshape_combined(args):
        x, combined_shape = args
        return tf.reshape(x, (-1, combined_shape, input_shape[0] * input_shape[1] * input_shape[2]))

    expanded_final_input = layers.Lambda(reshape_combined)([expanded_final_input, tf.shape(x)[1]])

    def flatten_combined_pairs(x):
        shape = tf.shape(x)
        return tf.reshape(x, (-1, shape[1], shape[2]))

    combined_pairs_flat = layers.Lambda(flatten_combined_pairs)(x)
    expanded_final_input_flat = layers.Lambda(flatten_combined_pairs)(expanded_final_input)

    x_combined = layers.Concatenate(axis=-1)([combined_pairs_flat, expanded_final_input_flat])

    x = layers.LayerNormalization(epsilon=1e-6)(x_combined)
    for _ in range(2):
        x = transformer_encoder(x, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)

    x = layers.Dense(input_shape[0] * input_shape[1] * input_shape[2])(x)
    def dynamic_reshape(tensor, shape):
        return tf.reshape(tensor, shape)

    output_shape = (-1, input_shape[0], input_shape[1], input_shape[2])
    x = layers.Lambda(dynamic_reshape, arguments={'shape': output_shape})(x)

    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(input_shape[2], 3, padding="same", activation="sigmoid")(x)

    model = models.Model(inputs=[input_layer, final_input], outputs=x)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
    return model

input_shape = (32, 32, 1)
model = create_transformer_model(input_shape)
model.summary()


# create_transformer_model((32, 32, 1))

# def create_transformer_model(input_shape):
#     inputs = []
#     encoded_pairs = []
# 
#     for _ in range(num_pairs):
#         input_pattern = Input(shape=input_shape)
#         output_pattern = Input(shape=input_shape)
#         inputs.append(input_pattern)
#         inputs.append(output_pattern)
# 
#         # Flatten the input to 3D for positional encoding and Transformer layers
#         flattened_input = layers.Reshape((input_shape[0] * input_shape[1], input_shape[2]))(input_pattern)
#         flattened_output = layers.Reshape((input_shape[0] * input_shape[1], input_shape[2]))(output_pattern)
# 
#         # Positional Encoding
#         pos_encoding = positional_encoding(input_shape[0] * input_shape[1], input_shape[2])
#         x_in = flattened_input + pos_encoding
#         x_out = flattened_output + pos_encoding
# 
#         # Transformer Encoder Layer
#         for _ in range(2):  # Fewer layers for individual encoding
#             x_in = transformer_encoder(x_in, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)
#             x_out = transformer_encoder(x_out, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)
# 
#         combined = layers.Concatenate(axis=2)([x_in, x_out])  # Concatenate along the feature dimension
#         encoded_pairs.append(combined)
# 
#     # Combine all encoded pairs
#     combined_pairs = layers.Concatenate(axis=1)(encoded_pairs)
# 
#     # Process the combined representation
#     x = layers.LayerNormalization(epsilon=1e-6)(combined_pairs)
#     for _ in range(2):  # Additional processing with more Transformer layers
#         x = transformer_encoder(x, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)
# 
#     # Input for the final pattern
#     final_input = Input(shape=input_shape)
#     inputs.append(final_input)
# 
#     flattened_final_input = layers.Reshape((input_shape[0] * input_shape[1], input_shape[2]))(final_input)
#     x_final = flattened_final_input + pos_encoding
# 
#     # Transformer Encoder Layer for the final input
#     for _ in range(2):
#         x_final = transformer_encoder(x_final, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)
# 
#     # Flatten x_final to 2D
#     flattened_x_final = layers.Reshape((-1,))(x_final)
# 
#     # Expand the final input tensor to match the combined_pairs tensor along the sequence dimension
#     def repeat_vector(args):
#         x, rep = args
#         return tf.repeat(x, repeats=rep, axis=1)
# 
#     expanded_final_input = layers.Lambda(repeat_vector)([flattened_x_final, tf.shape(combined_pairs)[1]])
# 
#     # Reshape back to 3D
#     def reshape_combined(args):
#         x, combined_shape = args
#         return tf.reshape(x, (-1, combined_shape, input_shape[0] * input_shape[1] * input_shape[2]))
# 
#     expanded_final_input = layers.Lambda(reshape_combined)([expanded_final_input, tf.shape(combined_pairs)[1]])
# 
#     # Ensure shapes match for concatenation
#     def flatten_combined_pairs(x):
#         shape = tf.shape(x)
#         return tf.reshape(x, (-1, shape[1], shape[2]))
# 
#     combined_pairs_flat = layers.Lambda(flatten_combined_pairs)(combined_pairs)
#     expanded_final_input_flat = layers.Lambda(flatten_combined_pairs)(expanded_final_input)
# 
#     # Concatenate the processed combined_pairs and expanded_final_input
#     x_combined = layers.Concatenate(axis=-1)([combined_pairs_flat, expanded_final_input_flat])
# 
#     # Process the combined representation for the final prediction
#     x = layers.LayerNormalization(epsilon=1e-6)(x_combined)
#     for _ in range(2):  # Additional processing with more Transformer layers
#         x = transformer_encoder(x, head_size=64, num_heads=4, ff_dim=128, dropout=0.1)
# 
#     # Reshape to the correct output shape
#     x = layers.Dense(input_shape[0] * input_shape[1] * input_shape[2])(x)
#     def dynamic_reshape(tensor, shape):
#         return tf.reshape(tensor, shape)
# 
#     output_shape = (-1, input_shape[0], input_shape[1], input_shape[2])
#     x = layers.Lambda(dynamic_reshape, arguments={'shape': output_shape})(x)
# 
# 
#     x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
#     x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
#     x = layers.Conv2D(input_shape[2], 3, padding="same", activation="sigmoid")(x)  # Ensuring output shape matches input shape
# 
#     model = models.Model(inputs=inputs, outputs=x)
#     model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
#     return model


# Create the Transformer model
input_shape = (32, 32, 1)  # Example input shape with a channel dimension

# Train the model
#history = model.fit(x_train, y_train, epochs=10, validation_data=(x_val, y_val))


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None, 32, 3  0           []                               
                                2, 1)]                                                            
                                                                                                  
 lambda (Lambda)                (None, 1024, 1)      0           ['input_2[0][0]']                
                                                                                                  
 reshape (Reshape)              (None, 1024, 1)      0           ['lambda[0][0]']                 
                                                                                                  
 tf.__operators__.add (TFOpLamb  (None, 1024, 0)     0           ['reshape[0][0]']          

In [17]:
# def clean_data(train_data, num_pairs):
#     cleaned_data = []
#     for data in train_data:
#         try:
#             x_gen_train = data[0]
#             y_final_train = data[1]
#             z_output = data[2]
# 
#             for i in range(num_pairs * 2):
#                 # print(x_gen_train[i].shape)
#                 if x_gen_train[i].shape != (32, 32):
#                     raise ValueError(f"Unexpected shape for x_gen_train[{i}]: {x_gen_train[i].shape}")
# 
#             #print(y_final_train.shape)
#             if y_final_train.shape != (32, 32):
#                 raise ValueError(f"Unexpected shape for y_final_train: {y_final_train.shape}")
# 
#             #print(z_output.shape)
#             if z_output.shape != (32, 32):
#                 raise ValueError(f"Unexpected shape for z_output: {z_output.shape}")
# 
#             cleaned_data.append(data)
# 
#         except ValueError as e:
#             print(f"Skipping due to error: {e}")
#             continue
# 
#     if not cleaned_data:
#         raise ValueError("No valid data entries found after cleaning.")
# 
#     return cleaned_data
# 
# cleaned_train_data = clean_data(train_data, 1)

In [18]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence


class DataGenerator(Sequence):
    def __init__(self, train_data, num_pairs, batch_size=32, shuffle=True):
        self.train_data = train_data
        self.num_pairs = num_pairs
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.train_data) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_data = [self.train_data[k] for k in indices]
        return self.__data_generation(batch_data)

    def __data_generation(self, batch_data):
        _data = batch_data[0]
        x_gen_train = _data[0]
        y_final_train = _data[1]
        z_output = _data[2]

        _x_train = [[np.expand_dims(x_gen_train[i], axis=-1) for i in range(self.num_pairs * 2)], np.expand_dims(y_final_train, axis=-1)]
        _y_train = np.expand_dims(z_output, axis=-1)
        return x_train, y_train

    def on_epoch_end(self):
        self.indices = np.arange(len(self.train_data))
        if self.shuffle:
            np.random.shuffle(self.indices)


# Example usage:
num_pairs = 5
batch_size = 1
# dummy_train_data = [(
#     [np.random.rand(32, 32, 1) for _ in range(num_pairs * 2)],
#     np.random.rand(32, 32, 1),
#     np.random.rand(32, 32, 1)
# ) for _ in range(10)]  # Use a smaller dataset for debugging

# train_generator = DataGenerator(train_data, num_pairs, batch_size)
# eval_generator = DataGenerator(eval_data, num_pairs, batch_size)
# 
# model = create_transformer_model(input_shape, num_pairs)
# model.summary()

# # Train the model using the data generator
# model.fit(train_generator, epochs=100, batch_size=batch_size, validation_data=eval_generator)


In [19]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, Input

class ProcessPairsLayer(layers.Layer):
    def __init__(self, input_shape, pos_encoding, **kwargs):
        super(ProcessPairsLayer, self).__init__(**kwargs)
        self.input_shape_ = input_shape
        self.pos_encoding = pos_encoding
        self.head_size = 64
        self.num_heads = 4
        self.ff_dim = 128
        self.dropout = 0.1

    def build(self, input_shape):
        self.mha_layers = [layers.MultiHeadAttention(key_dim=self.head_size, num_heads=self.num_heads, dropout=self.dropout) for _ in range(2)]
        self.ffn_layers = [self.build_ffn() for _ in range(2)]
        super(ProcessPairsLayer, self).build(input_shape)

    def build_ffn(self):
        return tf.keras.Sequential([
            layers.LayerNormalization(epsilon=1e-6),
            layers.Dense(self.ff_dim, activation=tf.nn.gelu),
            layers.Dropout(self.dropout),
            layers.Dense(self.input_shape_[2])
        ])

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        num_pairs = tf.shape(inputs)[1]

        reshaped_inputs = tf.reshape(inputs, (batch_size * num_pairs, self.input_shape_[0] * self.input_shape_[1], self.input_shape_[2]))
        x = reshaped_inputs + self.pos_encoding

        for mha_layer, ffn_layer in zip(self.mha_layers, self.ffn_layers):
            x = mha_layer(x, x)
            x = layers.Dropout(self.dropout)(x)
            x = layers.Add()([x, reshaped_inputs])
            x = ffn_layer(x)
            x = layers.Add()([x, reshaped_inputs])

        x = tf.reshape(x, (batch_size, num_pairs * self.input_shape_[0] * self.input_shape_[1], self.input_shape_[2]))
        return x

def positional_encoding(length, depth):
    depth = depth // 2
    positions = np.arange(length)[:, np.newaxis]
    depths = np.arange(depth)[np.newaxis, :] / depth
    angle_rates = 1 / (10000**depths)
    angle_rads = positions * angle_rates
    pos_encoding = np.concatenate([np.sin(angle_rads), np.cos(angle_rads)], axis=-1)
    pos_encoding = pos_encoding[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)

def create_transformer_model(input_shape):
    pos_encoding = positional_encoding(input_shape[0] * input_shape[1], input_shape[2])

    input_layer = Input(shape=(None, input_shape[0], input_shape[1], input_shape[2]))
    processed_pairs = ProcessPairsLayer(input_shape, pos_encoding)(input_layer)

    x = layers.LayerNormalization(epsilon=1e-6)(processed_pairs)
    for _ in range(2):
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        x = layers.MultiHeadAttention(key_dim=64, num_heads=4, dropout=0.1)(x, x)
        x = layers.Dropout(0.1)(x)
        x = layers.Add()([x, processed_pairs])
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        x = layers.Dense(128, activation=tf.nn.gelu)(x)
        x = layers.Dropout(0.1)(x)
        x = layers.Dense(input_shape[2])(x)
        x = layers.Add()([x, processed_pairs])

    final_input = Input(shape=input_shape)
    flattened_final_input = layers.Reshape((input_shape[0] * input_shape[1], input_shape[2]))(final_input)
    x_final = flattened_final_input + pos_encoding

    for _ in range(2):
        x_final = layers.LayerNormalization(epsilon=1e-6)(x_final)
        x_final = layers.MultiHeadAttention(key_dim=64, num_heads=4, dropout=0.1)(x_final, x_final)
        x_final = layers.Dropout(0.1)(x_final)
        x_final = layers.Add()([x_final, flattened_final_input])
        x_final = layers.LayerNormalization(epsilon=1e-6)(x_final)
        x_final = layers.Dense(128, activation=tf.nn.gelu)(x_final)
        x_final = layers.Dropout(0.1)(x_final)
        x_final = layers.Dense(input_shape[2])(x_final)
        x_final = layers.Add()([x_final, flattened_final_input])

    flattened_x_final = layers.Reshape((-1,))(x_final)

    def repeat_vector(args):
        x, rep = args
        return tf.repeat(x, repeats=rep, axis=1)

    expanded_final_input = layers.Lambda(repeat_vector)([flattened_x_final, tf.shape(x)[1]])

    def reshape_combined(args):
        x, combined_shape = args
        return tf.reshape(x, (-1, combined_shape, input_shape[0] * input_shape[1] * input_shape[2]))

    expanded_final_input = layers.Lambda(reshape_combined)([expanded_final_input, tf.shape(x)[1]])

    def flatten_combined_pairs(x):
        shape = tf.shape(x)
        return tf.reshape(x, (-1, shape[1], shape[2]))

    combined_pairs_flat = layers.Lambda(flatten_combined_pairs)(x)
    expanded_final_input_flat = layers.Lambda(flatten_combined_pairs)(expanded_final_input)

    x_combined = layers.Concatenate(axis=-1)([combined_pairs_flat, expanded_final_input_flat])

    x = layers.LayerNormalization(epsilon=1e-6)(x_combined)
    for _ in range(2):
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        x = layers.MultiHeadAttention(key_dim=64, num_heads=4, dropout=0.1)(x, x)
        x = layers.Dropout(0.1)(x)
        x = layers.Add()([x, x_combined])
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        x = layers.Dense(128, activation=tf.nn.gelu)(x)
        x = layers.Dropout(0.1)(x)
        x = layers.Dense(input_shape[2])(x)
        x = layers.Add()([x, x_combined])

    x = layers.Dense(input_shape[0] * input_shape[1] * input_shape[2])(x)
    def dynamic_reshape(tensor, shape):
        return tf.reshape(tensor, shape)

    output_shape = (-1, input_shape[0], input_shape[1], input_shape[2])
    x = layers.Lambda(dynamic_reshape, arguments={'shape': output_shape})(x)

    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(input_shape[2], 3, padding="same", activation="sigmoid")(x)

    model = models.Model(inputs=[input_layer, final_input], outputs=x)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
    return model

input_shape = (32, 32, 1)
model = create_transformer_model(input_shape)
model.summary()


Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, None, 32, 3  0           []                               
                                2, 1)]                                                            
                                                                                                  
 process_pairs_layer (ProcessPa  (None, None, 1)     3205        ['input_4[0][0]']                
 irsLayer)                                                                                        
                                                                                                  
 input_5 (InputLayer)           [(None, 32, 32, 1)]  0           []                               
                                                                                            

In [20]:

class DataGenerator(Sequence):
    def __init__(self, train_data, batch_size=32, shuffle=True):
        self.train_data = train_data
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.train_data) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_data = [self.train_data[k] for k in indices]
        x, y = self.__data_generation(batch_data)
        return x, y

    def on_epoch_end(self):
        self.indices = np.arange(len(self.train_data))
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __data_generation(self, batch_data):
        x_inputs = []
        final_inputs = []
        y_train_batch = []

        for data in batch_data:
            input_output_pairs = data[0]
            final_input = data[1]
            attended_output = data[2]

            # Ensure input-output pairs are correctly shaped
            pairs = np.stack(input_output_pairs, axis=0)
            x_inputs.append(pairs)
            final_inputs.append(final_input)
            y_train_batch.append(attended_output)

        x_inputs = np.stack(x_inputs)
        final_inputs = np.stack(final_inputs)
        y_train_batch = np.stack(y_train_batch)

        return [x_inputs, final_inputs], y_train_batch
    
    
#train_generator = DataGenerator(train_data)
#model.fit(train_generator, epochs=10)

In [21]:
def prepare_prediction_input(pairs, final_input):
    """
    Prepare the input for prediction by stacking the input-output pairs
    and the final input matrix.

    :param pairs: List of input-output pairs (e.g., 8 matrices for 4 pairs).
    :param final_input: The final input matrix to complete.
    :return: Tuple of (stacked input pairs, final input matrix).
    """
    pairs_tensor = np.stack(pairs, axis=0)
    pairs_tensor = np.expand_dims(pairs_tensor, axis=0)  # Add batch dimension
    final_input_tensor = np.expand_dims(final_input, axis=0)  # Add batch dimension

    return pairs_tensor, final_input_tensor


overfit_test = prepare_prediction_input(train_data[0][0], train_data[0][1])
overfit_output = np.expand_dims(train_data[0][2], 0)

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)
        tf.config.experimental.set_virtual_device_configuration(
            physical_devices[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10240)]
        )
    except RuntimeError as e:
        print(e)


# model.fit(overfit_test, overfit_output, epochs=10)

# Example input data for prediction
# input_pairs = [np.random.rand(32, 32, 1) for _ in range(8)]  # 4 pairs = 8 matrices
# final_input_matrix = np.random.rand(32, 32, 1)
# 
# # Prepare the prediction input
# x_input, final_input = prepare_prediction_input(input_pairs, final_input_matrix)
# 
# # Perform prediction
# # predicted_output = model.predict([x_input, final_input])
# 
# output_desired = np.random.rand(32, 32, 1)
# predicted_output = model.fit([x_input, final_input], np.expand_dims(output_desired, axis=0), epochs=10)
# final_output_matrix = predicted_output[0, :, :, 0]
# # Display the predicted output
# display_output(final_output_matrix)



# model.summary()
# 
# for i in range(0, len(train_data)):
#     # test_inputs = train_data[i][0].copy()
#     # final_input = train_data[i][1].copy()
#     # attended_output = train_data[i][2].copy()
#     # 
#     # # for j in range(0, len(test_inputs)):
#     # #     test_inputs[j] = np.expand_dims(test_inputs[j], axis=0)
#     # #     display_output(test_inputs[j])
#     #     #print(test_inputs[j].shape)
#     # 
#     # pairs = np.stack(test_inputs, axis=0)
#     
#     def prepare_prediction_input(pairs, final_input):
#         pairs_tensor = np.stack(pairs, axis=0)
#         pairs_tensor = np.expand_dims(pairs_tensor, axis=0)
#         final_input_tensor = np.expand_dims(final_input, axis=0)
#         return pairs_tensor, final_input_tensor
# 
#     # Example input data for prediction
#     input_pairs = [np.random.rand(32, 32, 1) for _ in range(8)]  # 4 pairs = 8 matrices
#     final_input_matrix = np.random.rand(32, 32, 1)
#     
#     # Prepare the prediction input
#     x_input, final_input = prepare_prediction_input(input_pairs, final_input_matrix)
#     
#     # Perform prediction
#     predicted_output = model.predict([x_input, final_input])
# 
#     # x_train = [[test_inputs[i] for i in range(num_pairs * 2)], np.expand_dims(final_input, axis=0)]
#     # x_train = [*test_inputs, np.expand_dims(final_input, axis=0)]
#     ## we create a stack of the input pairs
#     
#     #print(len(x_train))
#     # 
#     #y_train = np.expand_dims(attended_output, axis=0)
#     # print(y_train.shape)
#     # ## BEFORE
#     # final_output = model.predict(x_train)
#     # final_output_matrix = final_output[0, :, :, 0]
#     # display_output(final_input, final_output_matrix)  # This will print the 32x32 matrix
#     # 
#     #model.fit(x_train, y_train, epochs=100)
# 
# #    final_output_matrix = final_output[0, :, :, 0]
# 
# #    display_output(final_input, final_output_matrix)  # This will print the 32x32 matrix
# #    display_output(final_input, attended_output)  # This will print the 32x32 matrix
# 


Physical devices cannot be modified after being initialized


In [None]:
final_output = model.predict(x_train)
final_output_matrix = final_output[0, :, :, 0]

print(final_output_matrix.shape)  # Should print (32, 32)
display_output(final_output_matrix, final_output_matrix)  # This will print the 32x32 matrix
display_output(test_inputs[0], final_output_matrix)  # This will print the 32x32 matrix
print(final_output)

In [None]:
# Example training data
x_train = np.random.rand(100, 32, 32, 1).astype(np.float32)  # 100 samples with a channel dimension
y_train = np.random.randint(0, 10, size=(100,)).astype(np.int32)  # 100 labels

# Example validation data
x_val = np.random.rand(20, 32, 32, 1).astype(np.float32)  # 20 samples with a channel dimension
y_val = np.random.randint(0, 10, size=(20,)).astype(np.int32)  # 20 labels

# Train the model
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_val, y_val))

In [None]:
import datetime

!rm -rf ./logs/fit
log_dir = "./logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir=log_dir,
    # enabling histogram will crash the learninig in non-eager mode
    histogram_freq=1, # every epoch
    write_images=True, # visualize model weights in image form
    update_freq='epoch', # this can be 'epoch' to make training faster (less logs)
)

%tensorboard --logdir logs/fit
model.fit(x=train_data, epochs=50, validation_data=eval_data)

In [None]:
# predict
z_pred = model.predict(train_data.take(1))
print(z_pred)
def display_output(output_1, output_2):
    output_1 = output_1.reshape(32,32)
    output_2 = output_2.reshape(32,32)
    output_1 = denormalize_data(output_1)
    output_2 = denormalize_data(output_2)
    plot_grid(output_1, output_2)


#display_output(train_data.take(1).as_numpy_iterator().next()[0]['c_input'], train_data.take(1).as_numpy_iterator().next()[0]['c_input'])
display_output(train_data.take(1).as_numpy_iterator().next()[0]['c_input'], z_pred.reshape(32,32))

attended_output = train_data.take(1).as_numpy_iterator().next()[1].reshape(32,32)
display_output(train_data.take(1).as_numpy_iterator().next()[0]['c_input'], attended_output)
#display_output(z_pred)

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Concatenate, Flatten, Reshape
from tensorflow.keras.models import Model
import numpy as np

def create_padded_model(max_pairs, max_shape=(32, 32)):
    inputs = []
    for i in range(max_pairs):
        input_a = Input(shape=max_shape, name=f'input_a_{i+1}')
        input_b = Input(shape=max_shape, name=f'input_b_{i+1}')
        inputs.append(input_a)
        inputs.append(input_b)

    # Flatten and concatenate all inputs
    flattened_inputs = [Flatten()(input_tensor) for input_tensor in inputs]
    concatenated = Concatenate()(flattened_inputs)

    # Define the rest of the model
    x = Dense(256, activation='relu')(concatenated)
    x = Dense(128, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    output = Dense(1, activation='linear')(x)

    # Create the model
    model = Model(inputs=inputs, outputs=output)

    return model

model = create_padded_model(max_pairs=5, max_shape=(32, 32))
model.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())

In [None]:

# Create a data generator for training and validation data
batch_size = 32
train_generator = DataGenerator(train_data, batch_size=batch_size, max_pairs=5, max_shape=(32, 32))
val_generator = DataGenerator(test_data, batch_size=batch_size, max_pairs=5, max_shape=(32, 32))

#tf.config.run_functions_eagerly(True)

# Train the model
model.fit(train_generator, epochs=10, steps_per_epoch=len(train_data)//batch_size, validation_data=val_generator, validation_steps=len(val_data)//batch_size)


In [None]:
# Evaluate the model
test_loss = model.evaluate(test_generator)
print('Test Loss:', test_loss)

# Visualize a few test samples and their predictions
def plot_predictions(model, X, y, num_samples=1):
    predictions = model.predict(X[:num_samples])
    fig, axes = plt.subplots(num_samples, 3, figsize=(10, 10))
    for i in range(num_samples):
        axes[i, 0].imshow(X[i].squeeze(), cmap='gray')
        axes[i, 0].set_title('Input')
        axes[i, 1].imshow(y[i].squeeze(), cmap='gray')
        axes[i, 1].set_title('True Output')
        axes[i, 2].imshow(predictions[i].squeeze(), cmap='gray')
        axes[i, 2].set_title('Predicted Output')
    plt.tight_layout()
    plt.show()

# Plot predictions (using a small subset of the test data)
X_test_subset = np.array(X_test[:5])
y_test_subset = np.array(y_test[:5])
plot_predictions(model, X_test_subset, y_test_subset)
