In [None]:
import os

current_base_path = os.getcwd()
print(f"Aktueller Arbeitsverzeichnis-Pfad: {current_base_path}")


# Generate Synthetic Data
Generate a set of `n` square symmetrical and positive semi matrices of dimensions `MATRIX_DIM` to RAM.

## Define Constants
The following constants are used throughout the rest of the notebook.

In [2]:
MATRIX_DIM = 64
NUMBER_OF_MATRICES = 100

AVERAGE_BLOCK_SIZE = 10
BLOCK_SIZE_STD_DEV = 0.66

NOISE_BACKGROUND_DENSITY_RANGE = (0.3, 0.5)
NOISE_BACKGROUND_VALUE_RANGE = (0.0, 0.5)

NOISE_BLOCK_GAP_CHANCE = 0.5
NOISE_BLOCK_SIZE_RANGE = (3, 32)
NOISE_BLOCK_DENSITY_RANGE = (0.3, 0.5)
NOISE_BLOCK_VALUE_RANGE = (0.0, 1.0)

TRUE_BLOCK_GAP_CHANCE = 0.0
TRUE_BLOCK_SIZE_RANGE = (2, 32)
TRUE_BLOCK_DENSITY_RANGE = (0.5, 0.7)
TRUE_BLOCK_VALUE_RANGE = (0.0, 1.0)

DIAGONAL_BAND_RADIUS = 10

RNG_SEED = 42

In [3]:
import numpy as np
import tensorflow as tf


import importlib

import matrixlib.io
import matrixlib.util
import matrixlib.plot
import matrixlib.generate
from matrixlib.metadata import MatrixMetadata

#import CNN.dataloader
import CNN.models
import CNN.training
import CNN.evaluation

# reload the library when changes were made to it
importlib.reload(matrixlib.io)
importlib.reload(matrixlib.util)
importlib.reload(matrixlib.plot)
importlib.reload(matrixlib.generate)
#importlib.reload(CNN.dataloader)

importlib.reload(CNN.models)
importlib.reload(CNN.training)
importlib.reload(CNN.evaluation)
importlib.reload(matrixlib.generate)


2024-06-09 16:16:46.890858: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<module 'matrixlib.generate' from '/home/moonchild/PycharmProjects/opencampus-preconditioner-ai-project/matrixlib/generate.py'>

In [4]:
def generate_matrices(number_of_matrices: int, dimension: int) -> (np.ndarray, np.ndarray):
    generated_metadata: MatrixMetadata = MatrixMetadata(NUMBER_OF_MATRICES, MATRIX_DIM)
    generated_matrices: np.array = matrixlib.generate.__init_zero_matrices(number_of_matrices, dimension)

    # add background noise to the generated matrices
    matrixlib.generate.add_noise(
        generated_matrices,
        generated_metadata.noise_background_density,
        NOISE_BACKGROUND_DENSITY_RANGE,
        NOISE_BACKGROUND_VALUE_RANGE
    )

    # add noise blocks with higher values to the diagonal of the matrix
    noise_block_sizes = matrixlib.generate.add_blocks(
        generated_matrices,
        None,
        generated_metadata.noise_block_density,
        NOISE_BLOCK_DENSITY_RANGE,
        NOISE_BLOCK_VALUE_RANGE,
        NOISE_BLOCK_SIZE_RANGE,
        NOISE_BLOCK_GAP_CHANCE,
        AVERAGE_BLOCK_SIZE,
        BLOCK_SIZE_STD_DEV,
    )

    # add 'real' blocks to the diagonal of the matrix without gaps
    true_block_sizes = matrixlib.generate.add_blocks(
        generated_matrices,
        generated_metadata.block_starts,
        generated_metadata.true_block_density,
        TRUE_BLOCK_DENSITY_RANGE,
        TRUE_BLOCK_VALUE_RANGE,
        TRUE_BLOCK_SIZE_RANGE,
        TRUE_BLOCK_GAP_CHANCE,
        AVERAGE_BLOCK_SIZE,
        BLOCK_SIZE_STD_DEV,
    )

    return generated_matrices, generated_metadata

In [5]:
matrices, metadata = generate_matrices(NUMBER_OF_MATRICES, MATRIX_DIM)
matrices = matrices.reshape(NUMBER_OF_MATRICES, MATRIX_DIM, MATRIX_DIM, 1)
labels = metadata.block_starts

print(matrices.shape)

# Create dataset from matrices and labels
dataset = tf.data.Dataset.from_tensor_slices((matrices, labels))

# Split the dataset
train_size = int(0.8 * NUMBER_OF_MATRICES)
val_size = int(0.1 * NUMBER_OF_MATRICES)
test_size = NUMBER_OF_MATRICES - train_size - val_size

train_dataset = dataset.take(train_size).shuffle(buffer_size=10)
val_dataset = dataset.skip(train_size).take(val_size)
test_dataset = dataset.skip(train_size + val_size)

print(f"Train size: {train_size}, Val size: {val_size}, Test size: {test_size}")


Generating matrices with a total number of 409600 entries (100 64x64 matrices)
Matrix for element size 8 bytes is a total of 3276800 bytes (3.125 MiB)
(100, 64, 64, 1)
Train size: 80, Val size: 10, Test size: 10


In [None]:
print(train_dataset)
print(val_dataset)

In [None]:
for element in val_dataset:
    print(element)
    

In [None]:
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# training loop
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam

model = CNN.models.Baseline(input_shape=(MATRIX_DIM, MATRIX_DIM, 1))
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=BinaryCrossentropy(),  # Use a specific instance if configuring parameters
              metrics=['accuracy'])

num_epochs = 10
log_dir = "runs/matrix_cnn_experiment"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
print("Files in log directory:", os.listdir(log_dir))

trained_model, train_losses, val_losses = CNN.training.train_model(
    model=model,
    train_dataset=train_dataset.batch(1),  # Adjust batch size as needed
    val_dataset=val_dataset.batch(1),
    loss_fn=BinaryCrossentropy(),  # Using an instance allows more configuration if needed
    optimizer=Adam(learning_rate=0.001),
    num_epochs=num_epochs,
    log_dir=log_dir
)



# backup


In [None]:
data_params = {
    'batch_size': 4,
    'img_size': (64, 64)
}

dataset = CNN.dataloader.MatrixDataset(**data_params) 
train_dataset = dataset._create_dynamic_dataset(number_of_matrices=NUMBER_OF_MATRICES, dimension=MATRIX_DIM, split='train')
val_dataset = dataset._create_dynamic_dataset(number_of_matrices=NUMBER_OF_MATRICES, dimension=MATRIX_DIM, split='val')
test_dataset = dataset._create_dynamic_dataset(number_of_matrices=NUMBER_OF_MATRICES, dimension=MATRIX_DIM, split='test')

for data, labels in train_dataset:
    print(data.numpy(), labels.numpy())

In [None]:
for data, labels in val_dataset:
    print(data.numpy(), labels.numpy())

In [None]:
print(val_dataset)
print(train_dataset)
print(test_dataset)

# train

In [None]:
import datetime
model = CNN.models.Baseline
loss_fn = tf.keras.losses.binary_crossentropy
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
log_dir = "runs/matrix_cnn_experiment"
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
print("Files in log directory:", os.listdir(log_dir))

CNN.training.train_model(model, train_dataset, val_dataset, loss_fn, optimizer, num_epochs=10, log_dir=log_dir)