# Setup

In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

import datetime
import importlib
import numpy as np
import tensorflow as tf

# Modellib
import modellib.cnn
import modellib.train
import modellib.io as io
import modellib.evaluate as eval

# Load and transform matrix datasets

In [None]:
# Reading the datasets
train_bands, train_labels = io.read_from_hdf5("data/datasets/train_dataset_64_1600.h5")
val_bands, val_labels = io.read_from_hdf5("data/datasets/val_dataset_64_200.h5")
test_bands, test_labels = io.read_from_hdf5("data/datasets/test_dataset_64_200.h5")

# Printing shapes to verify
print(f"Train bands shape: {train_bands.shape}, Train labels shape: {train_labels.shape}")
print(f"Validation bands shape: {val_bands.shape}, Validation labels shape: {val_labels.shape}")
print(f"Test bands shape: {test_bands.shape}, Test labels shape: {test_labels.shape}")

# Convert to tensorflow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_bands, train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((val_bands, val_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_bands, test_labels))

# Training
run tensorboard --logdir logs to launch tensorboard

In [None]:
# Create log dir
log_dir = "logs/cnn/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
print("Files in log directory:", os.listdir(log_dir))

# Create Learning Rate scheduler
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.00001,
    decay_steps=1000,
    decay_rate=0.9
)

# Define parameters
batch_size = 16
num_epochs = 200
input_shape = (21, 64, 1)
optimizer = tf.keras.optimizers.Nadam(learning_rate=lr_schedule)
class_weights = {0: 0.2, 1: 0.8}

# Create and Compile Model
model = modellib.cnn.create_compile_model_custom_loss(
    input_shape, 
    optimizer, 
    class_weights
)

# Start Training Loop
trained_model, train_losses, val_losses = modellib.train.train_model(
    model,
    train_dataset.batch(batch_size),  
    val_dataset.batch(batch_size),
    num_epochs,
    log_dir
)

In [None]:
# Plot losses
modellib.train.plot_losses(train_losses, val_losses)

# Evaluation

In [None]:
# Evaluate the model on the test set
evaluation_results = modellib.evaluate.evaluate_model(
    trained_model,
    test_dataset.batch(batch_size),
    class_weights
)

# Restore Best Model Weights

In [None]:
# Restore weights from best run
new_model = modellib.cnn.Baseline(input_shape)
new_model.build((None,) + input_shape)  # None represents the batch dimension
new_model.load_weights("cnn.weights.h5")

# print weights
print(f"Model Weights: {new_model.get_weights()}")

# Evaluate the restored model
new_results = modellib.evaluate.evaluate_model(
    new_model,
    test_dataset.batch(batch_size),
    class_weights
)

# Make Predictions

In [None]:
importlib.reload(eval)
test_predictions = new_model.predict(test_dataset.batch(batch_size))

# Convert to Binary
threshold = 0.5
binary_predictions = (test_predictions >= threshold).astype(int)

# Save predictions


In [None]:
# Save predictions 
np.save('test_predictions.npy', binary_predictions)

In [None]:
# Load predictions and print to verify
loaded_predictions = np.load('test_predictions.npy')
print(loaded_predictions)

# Evaluate to verify
metrics = eval.calculate_metrics(test_labels, loaded_predictions)