In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
from sklearn.metrics import f1_score # Keep this for later if labels are available

# Define image dimensions and batch size
IMG_HEIGHT = 128
IMG_WIDTH = 128
BATCH_SIZE = 32

# Load image datasets
train_ds = tf.keras.utils.image_dataset_from_directory(
    "complete_set/training_set",
    labels='inferred',
    label_mode='binary',
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    interpolation='nearest',
    batch_size=BATCH_SIZE,
    shuffle=True
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    "complete_set/testing_set",
    labels=None, # Temporarily set to None to bypass ValueError
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    interpolation='nearest',
    batch_size=BATCH_SIZE,
    shuffle=False
)

# Normalize image pixel values to [0, 1]
def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

# Apply normalization only to train_ds for now (test_ds has no labels)
train_ds = train_ds.map(normalize_img)
# For test_ds, if labels=None, map function will need adjustment or separate handling
test_ds = test_ds.map(lambda image: tf.cast(image, tf.float32) / 255.) # Normalize images without labels

# Configure dataset for performance
train_ds = train_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

# Build a simple CNN model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid') # Binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
epochs = 10
history = model.fit(train_ds, epochs=epochs)

# Predict on test data
y_pred_probs = model.predict(test_ds)
y_pred = (y_pred_probs > 0.5).astype(int) # Convert probabilities to binary predictions

print("Model training complete and predictions made on the test set.")
print("To evaluate accuracy and F1 score, please organize your 'complete_set/testing_set' into 'benign' and 'malignant' subfolders, and then set `labels='inferred'` and `label_mode='binary'` for the test_ds loading.")

# Save metrics (only accuracy from training history, as test labels are not available)
# We can't calculate F1 score without true labels for the test set.
with open("metrics.txt", "w") as f:
    f.write(f"Training Accuracy (last epoch): {history.history['accuracy'][-1]:.4f}\n")
    f.write(f"Training Loss (last epoch): {history.history['loss'][-1]:.4f}\n")
    f.write("Test set evaluation (accuracy/F1) skipped due to unlabeled test data. Please organize 'testing_set' into class subfolders for full evaluation.")


Found 1112 files belonging to 2 classes.
Found 200 files.
Epoch 1/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 191ms/step - accuracy: 0.7032 - loss: 0.6082
Epoch 2/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 175ms/step - accuracy: 0.7572 - loss: 0.5094
Epoch 3/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 177ms/step - accuracy: 0.8004 - loss: 0.4093
Epoch 4/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 177ms/step - accuracy: 0.8651 - loss: 0.3202
Epoch 5/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 190ms/step - accuracy: 0.9047 - loss: 0.2375
Epoch 6/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 189ms/step - accuracy: 0.9173 - loss: 0.2183
Epoch 7/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 188ms/step - accuracy: 0.9362 - loss: 0.1748
Epoch 8/10
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 197ms/step - accuracy: 0.9