In [79]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Resizing, Conv2D, Flatten, Dense, Rescaling
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.callbacks import EarlyStopping
# from tensorflow.keras.utils import split_dataset
from tensorflow.data.experimental import cardinality
from tensorflow.keras.metrics import SensitivityAtSpecificity, TruePositives, FalseNegatives


import os


In [None]:
try:
    tpu = TPUClusterResolver()  # Detect TPU
    experimental_connect_to_cluster(tpu)
    initialize_tpu_system(tpu)
    strategy = TPUStrategy(tpu)
    print("Running on TPU")
except ValueError:
    strategy = get_strategy()  # Default for CPU/GPU
    print("Running on CPU/GPU")

In [64]:
# OS agnostic path handling

# If running in a Jupyter notebook, use the current directory
# Otherwise, use the directory of the script
try:
    dirname = os.path.dirname(__file__)
except NameError:
    dirname = os.getcwd()

train_data_path = os.path.join(dirname, './data/train')
test_data_path = os.path.join(dirname, './data/test')

# load training data locally
train_dataset = image_dataset_from_directory(train_data_path,
                                             labels='inferred',
                                             label_mode=('categorical'),
                                             image_size=(224, 224),
                                             batch_size=32,
                                             shuffle=True)

#train_dataset = train_dataset.batch(32, drop_remainder=True)

# load testing data locally
test_dataset = image_dataset_from_directory('./data/test',
                                            labels='inferred',
                                            label_mode=('categorical'),
                                            image_size=(224, 224),
                                            batch_size=32,
                                            shuffle=False)

# train_split, validation_split = split_dataset(train_dataset, left_size=0.8)

# Get the total number of batches
total_batches = cardinality(train_dataset).numpy()
val_batches = int(0.2 * total_batches)

# Split the dataset
val_dataset = train_dataset.take(val_batches)
train_dataset = train_dataset.skip(val_batches)


Found 2637 files belonging to 2 classes.
Found 660 files belonging to 2 classes.


In [80]:
# Checking out images

# Get one batch (images and labels)
# for images, labels in train_dataset.take(1):
#     # Get the first image and label from the batch
#     img = images[0].numpy().astype("uint8")
#     label = labels[0].numpy()
#     print("Image shape:", img.shape)
#     print("Label (one-hot):", label)
#     print(images[0].numpy().astype("uint8"))

#     # Display the image
#     import matplotlib.pyplot as plt
#     plt.imshow(img)
#     plt.title(f"Label: {label}")
#     plt.axis('off')
#     plt.show()

In [66]:
print(type(train_dataset))

# Get one batch from the dataset
for images, labels in train_dataset.take(1):
    print(images.shape)  # Output: (batch_size, height, width, channels)


<class 'tensorflow.python.data.ops.skip_op._SkipDataset'>
(32, 224, 224, 3)


2025-06-03 13:00:07.920522: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [67]:
# Define model architecture, simple CNN to start with
model = Sequential([
    Rescaling(1 / 255, input_shape=(224, 224, 3)),
    Resizing(224, 224), # leaving out argument: input_shape=(224, 224, 3)
    Conv2D(32, (3, 3), activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(2, activation='softmax')])

In [68]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy','recall'])

In [69]:
es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [70]:
print("Train batches:", cardinality(train_dataset).numpy())
print("Val batches:", cardinality(val_dataset).numpy())

model.fit(train_dataset, epochs=10, validation_data=val_dataset, callbacks=[es])

Train batches: 67
Val batches: 16
Epoch 1/10
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 547ms/step - accuracy: 0.5440 - loss: 33.9052 - recall: 0.5440 - val_accuracy: 0.5762 - val_loss: 0.6897 - val_recall: 0.5762
Epoch 2/10
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 605ms/step - accuracy: 0.5473 - loss: 0.6910 - recall: 0.5473 - val_accuracy: 0.5898 - val_loss: 0.6876 - val_recall: 0.5898
Epoch 3/10
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 510ms/step - accuracy: 0.5466 - loss: 0.6905 - recall: 0.5466 - val_accuracy: 0.5625 - val_loss: 0.6888 - val_recall: 0.5625
Epoch 4/10
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 578ms/step - accuracy: 0.5459 - loss: 1.8415 - recall: 0.5459 - val_accuracy: 0.5625 - val_loss: 0.6881 - val_recall: 0.5625
Epoch 5/10
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 539ms/step - accuracy: 0.5334 - loss: 0.6910 - recall: 0.5334 - val_accuracy: 0.5859 

<keras.src.callbacks.history.History at 0x126346150>

In [71]:
y_pred = model.predict(test_dataset)

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 72ms/step
