# Setup

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

import datetime
import importlib
import numpy as np
import tensorflow as tf

# Modellib
import modellib.cnn
import modellib.train
import modellib.io as io
import modellib.evaluate as eval

2024-07-31 12:05:11.624646: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-31 12:05:11.862289: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-31 12:05:11.912035: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Load and transform matrix datasets

In [2]:
# Reading the datasets
train_bands, train_labels = io.read_from_hdf5("data/datasets/train_dataset_64_1600.h5")
val_bands, val_labels = io.read_from_hdf5("data/datasets/val_dataset_64_200.h5")
test_bands, test_labels = io.read_from_hdf5("data/datasets/test_dataset_64_200.h5")

# Printing shapes to verify
print(f"Train bands shape: {train_bands.shape}, Train labels shape: {train_labels.shape}")
print(f"Validation bands shape: {val_bands.shape}, Validation labels shape: {val_labels.shape}")
print(f"Test bands shape: {test_bands.shape}, Test labels shape: {test_labels.shape}")

# Convert to tensorflow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_bands, train_labels))
val_dataset = tf.data.Dataset.from_tensor_slices((val_bands, val_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_bands, test_labels))

Train bands shape: (1600, 21, 64, 1), Train labels shape: (1600, 64)
Validation bands shape: (200, 21, 64, 1), Validation labels shape: (200, 64)
Test bands shape: (200, 21, 64, 1), Test labels shape: (200, 64)


# Training
run tensorboard --logdir logs to launch tensorboard

In [3]:
# Create log dir
log_dir = "logs/cnn/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
print("Files in log directory:", os.listdir(log_dir))

# Create Learning Rate scheduler
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.00001,
    decay_steps=1000,
    decay_rate=0.9
)

# Define parameters
batch_size = 16
num_epochs = 200
input_shape = (21, 64, 1)
optimizer = tf.keras.optimizers.Nadam(learning_rate=lr_schedule)
class_weights = {0: 0.2, 1: 0.8}

Files in log directory: []


In [None]:
# Create and Compile Model
model = modellib.cnn.create_compile_model_custom_loss(
    input_shape, 
    optimizer, 
    class_weights
)

# Start Training Loop
trained_model, train_losses, val_losses = modellib.train.train_model(
    model,
    train_dataset.batch(batch_size),  
    val_dataset.batch(batch_size),
    num_epochs,
    log_dir
)

In [None]:
# Plot losses
modellib.train.plot_losses(train_losses, val_losses)

# Evaluation

In [None]:
# Evaluate the model on the test set
evaluation_results = modellib.evaluate.evaluate_model(
    trained_model,
    test_dataset.batch(batch_size),
    class_weights
)

# Restore Best Model Weights

### *IMPORTANT: The model weights can only be imported when using tensorflow 2.15.0. with keras 2.0.*

In [4]:
# Restore weights from best run
new_model = modellib.cnn.Baseline(input_shape)
new_model.build((None,) + input_shape)  # None represents the batch dimension
new_model.load_weights("data/weights/best_cnn.weights.h5")

# print weights
print(f"Model Weights: {new_model.get_weights()}")

# Evaluate the restored model
new_results = modellib.evaluate.evaluate_model(
    new_model,
    test_dataset.batch(batch_size),
    class_weights
)

Model Weights: []
Test Loss: 0.1944
Element-wise Accuracy: 0.55921875
Classification Report:
              precision    recall  f1-score   support

    no block       0.89      0.58      0.70     11516
       block       0.09      0.37      0.14      1284

    accuracy                           0.56     12800
   macro avg       0.49      0.47      0.42     12800
weighted avg       0.81      0.56      0.65     12800

Confusion Matrix:
[[6686 4830]
 [ 812  472]]
True Positives: 472
False Negatives: 812
True Negatives: 6686
False Positives: 4830


# Make Predictions

In [None]:
importlib.reload(eval)
test_predictions = new_model.predict(test_dataset.batch(batch_size))

# Convert to Binary
threshold = 0.5
binary_predictions = (test_predictions >= threshold).astype(int)

# Save predictions


In [None]:
# Save blockstarts 
# np.save('data/blockstarts/cnn_predictions.npy', binary_predictions)

In [None]:
# Load blockstarts and print to verify
loaded_predictions = np.load('data/blockstarts/cnn_predictions.npy')
print(loaded_predictions)

# Evaluate to verify
metrics = eval.calculate_metrics(test_labels, loaded_predictions)