In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("andrewmvd/medical-mnist")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/andrewmvd/medical-mnist?dataset_version_number=1...


100%|██████████| 84.8M/84.8M [00:07<00:00, 11.9MB/s]

Extracting files...





Path to dataset files: /Users/I569354/.cache/kagglehub/datasets/andrewmvd/medical-mnist/versions/1


In [6]:
import numpy as np
import matplotlib.pyplot as plt
import medmnist
from medmnist import INFO, Evaluator

In [7]:
def download_dataset(data_flag):
  print(f"Dataset source information : MedMNIST v{medmnist.__version__} @ {medmnist.HOMEPAGE}")

  download = True
  
  info = INFO[data_flag]

  DataClass = getattr(medmnist, info['python_class'])

  print('\nBreastMNIST')
  print(info['description'])

  # Downloading dataset
  train_dataset = DataClass(split='train', download=download)
  val_dataset = DataClass(split='val', download=download)
  test_dataset = DataClass(split='test', download=download)

  print('\nTraining Dataset:')
  print(train_dataset)
  # print(train_dataset.imgs.shape)
  if train_dataset.imgs.ndim == 3:
    print('Adding channel to images...')
    train_dataset.imgs = np.expand_dims(train_dataset.imgs, axis=-1)

  print('\nValidation Dataset:')
  print(val_dataset)
  # print(val_dataset.imgs.shape)
  if val_dataset.imgs.ndim == 3:
    print('Adding channel to images...')
    val_dataset.imgs = np.expand_dims(val_dataset.imgs, axis=-1)

  print('\nTesting Dataset:')
  print(test_dataset)
  # print(test_dataset.imgs.shape)
  if test_dataset.imgs.ndim == 3:
    print('Adding channel to images...')
    test_dataset.imgs = np.expand_dims(test_dataset.imgs, axis=-1)

  print('\nShapes of images:')
  print('Training: ', train_dataset.imgs.shape)
  print('Validation: ', val_dataset.imgs.shape)
  print('Testing: ', test_dataset.imgs.shape)

  return train_dataset, val_dataset, test_dataset

In [13]:
train_dataset, val_dataset, test_dataset = download_dataset('breastmnist')


Dataset source information : MedMNIST v3.0.2 @ https://github.com/MedMNIST/MedMNIST/

BreastMNIST
The BreastMNIST is based on a dataset of 780 breast ultrasound images. It is categorized into 3 classes: normal, benign, and malignant. As we use low-resolution images, we simplify the task into binary classification by combining normal and benign as positive and classifying them against malignant as negative. We split the source dataset with a ratio of 7:1:2 into training, validation and test set. The source images of 1×500×500 are resized into 1×28×28.
Using downloaded and verified file: /Users/I569354/.medmnist/breastmnist.npz
Using downloaded and verified file: /Users/I569354/.medmnist/breastmnist.npz
Using downloaded and verified file: /Users/I569354/.medmnist/breastmnist.npz

Training Dataset:
Dataset BreastMNIST of size 28 (breastmnist)
    Number of datapoints: 546
    Root location: /Users/I569354/.medmnist
    Split: train
    Task: binary-class
    Number of channels: 1
    Mean

In [18]:
class_labels = ['Malignant',
                'Normal/Benign']
# Check missing images in the dataset
if len(train_dataset.imgs) != 546:
  print("WARNING: Missing images in training dataset.")
  print(f"Found {len(train_dataset.imgs)}, should be 546.")
else:
  print("SUCCESS: No missing images in training dataset.")

if len(val_dataset.imgs) != 78:
  print("WARNING: Missing images in validation dataset.")
  print(f"Found {len(val_dataset.imgs)}, should be 78.")
else:
  print("SUCCESS: No missing images in validation dataset.")

if len(test_dataset.imgs) != 156:
  print("WARNING: Missing images in testing dataset.")
  print(f"Found {len(train_dataset.imgs)}, should be 156.")
else:
  print("SUCCESS: No missing images in test dataset.")

SUCCESS: No missing images in training dataset.
SUCCESS: No missing images in validation dataset.
SUCCESS: No missing images in test dataset.


In [19]:
# Normalising images in the dataset
# Pixel values lie between 0-255, 
# division by 255 will scale the values between 0 and 1
train_dataset.imgs = train_dataset.imgs/255.0 
val_dataset.imgs = val_dataset.imgs/255.0
test_dataset.imgs = test_dataset.imgs/255.0

In [47]:
from keras.models import Sequential
from keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, Dropout

model = Sequential()

model.add(Conv2D(32, (3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2,2), strides=(1,1), padding="same"))

model.add(Conv2D(32, (3, 3), padding="same", activation="relu"))
model.add(Conv2D(64, (3, 3), padding="same", activation="relu"))
model.add(Conv2D(128, (3, 3), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2,2), strides=(1,1),padding="same"))

model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(1, activation="sigmoid"))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [44]:
# Class imbalance handling through calculation of class weights

from sklearn.utils import class_weight
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

breast_class_weights = class_weight.compute_class_weight('balanced',
                                                         classes = np.unique(train_dataset.labels[:,0]),
                                                         y = train_dataset.labels[:, 0])

weights = { 0 : breast_class_weights[0], 1 : breast_class_weights[1] }
print(f"Class weights for imbalance {weights}")



print("Preprocessing and augmentation of images with Standardisation, Rotation and Horizontal Flips..")
datagen = ImageDataGenerator(rotation_range=10,
                             horizontal_flip=True)
my_callbacks = [tf.keras.callbacks.EarlyStopping(patience=10)]

Class weights for imbalance {0: 1.8571428571428572, 1: 0.6842105263157895}
Preprocessing and augmentation of images with Standardisation, Rotation and Horizontal Flips..


In [45]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [46]:
history = model.fit(datagen.flow(train_dataset.imgs, train_dataset.labels),
                    epochs=100,
                    callbacks=my_callbacks,
                    validation_data=(val_dataset.imgs, val_dataset.labels),
                    shuffle=True,
                    class_weight=weights)

Epoch 1/100


  self._warn_if_super_not_called()
2024-10-24 12:02:12.502906: W tensorflow/core/framework/op_kernel.cc:1828] INVALID_ARGUMENT: TypeError: `generator` yielded an element that did not match the expected structure. The expected structure was (tf.float32, tf.uint8, tf.float32), but the yielded element was (array([[[[0.6187597 ],
         [0.6087174 ],
         [0.6042217 ],
         ...,
         [0.68128   ],
         [0.6775488 ],
         [0.6745463 ]],

        [[0.6067892 ],
         [0.6084383 ],
         [0.59418255],
         ...,
         [0.5349604 ],
         [0.5324024 ],
         [0.5253816 ]],

        [[0.5271776 ],
         [0.59249705],
         [0.5684988 ],
         ...,
         [0.4987047 ],
         [0.546794  ],
         [0.56442624]],

        ...,

        [[0.08235294],
         [0.08132268],
         [0.0766023 ],
         ...,
         [0.08235294],
         [0.08169755],
         [0.07983821]],

        [[0.08206154],
         [0.08103128],
         [0.0770152

InvalidArgumentError: Graph execution error:

Detected at node PyFunc defined at (most recent call last):
<stack traces unavailable>
TypeError: `generator` yielded an element that did not match the expected structure. The expected structure was (tf.float32, tf.uint8, tf.float32), but the yielded element was (array([[[[0.6187597 ],
         [0.6087174 ],
         [0.6042217 ],
         ...,
         [0.68128   ],
         [0.6775488 ],
         [0.6745463 ]],

        [[0.6067892 ],
         [0.6084383 ],
         [0.59418255],
         ...,
         [0.5349604 ],
         [0.5324024 ],
         [0.5253816 ]],

        [[0.5271776 ],
         [0.59249705],
         [0.5684988 ],
         ...,
         [0.4987047 ],
         [0.546794  ],
         [0.56442624]],

        ...,

        [[0.08235294],
         [0.08132268],
         [0.0766023 ],
         ...,
         [0.08235294],
         [0.08169755],
         [0.07983821]],

        [[0.08206154],
         [0.08103128],
         [0.07701527],
         ...,
         [0.07698391],
         [0.07864852],
         [0.07556193]],

        [[0.07843138],
         [0.07792283],
         [0.07450981],
         ...,
         [0.07480989],
         [0.07813998],
         [0.07479253]]],


       [[[0.6181365 ],
         [0.61851746],
         [0.610684  ],
         ...,
         [0.59139454],
         [0.60987514],
         [0.6303877 ]],

        [[0.5909889 ],
         [0.5974647 ],
         [0.6014144 ],
         ...,
         [0.59368014],
         [0.6113989 ],
         [0.63267326]],

        [[0.5604936 ],
         [0.5574461 ],
         [0.5847517 ],
         ...,
         [0.62232596],
         [0.62784326],
         [0.64184093]],

        ...,

        [[0.08556504],
         [0.08224251],
         [0.08143471],
         ...,
         [0.10196079],
         [0.10196079],
         [0.10196079]],

        [[0.0851841 ],
         [0.08128109],
         [0.07737806],
         ...,
         [0.09549951],
         [0.09626137],
         [0.09702323]],

        [[0.08480318],
         [0.08090016],
         [0.07699713],
         ...,
         [0.08851886],
         [0.09128649],
         [0.09166741]]],


       [[[0.5739572 ],
         [0.5037051 ],
         [0.51948947],
         ...,
         [0.54456383],
         [0.5425717 ],
         [0.540619  ]],

        [[0.57932705],
         [0.5134685 ],
         [0.51411957],
         ...,
         [0.53120244],
         [0.5398168 ],
         [0.54030496]],

        [[0.58974236],
         [0.52705413],
         [0.48600683],
         ...,
         [0.55940545],
         [0.5285197 ],
         [0.5260788 ]],

        ...,

        [[0.14631352],
         [0.144849  ],
         [0.14193186],
         ...,
         [0.53255063],
         [0.52268994],
         [0.48571447]],

        [[0.13638338],
         [0.13589521],
         [0.13947015],
         ...,
         [0.49133724],
         [0.4808357 ],
         [0.46128386]],

        [[0.13753846],
         [0.13997932],
         [0.14351521],
         ...,
         [0.4923136 ],
         [0.4827884 ],
         [0.46470106]]],


       ...,


       [[[0.4626523 ],
         [0.46404928],
         [0.50517446],
         ...,
         [0.5597482 ],
         [0.52947277],
         [0.53324056]],

        [[0.4252921 ],
         [0.43437245],
         [0.5344552 ],
         ...,
         [0.5554439 ],
         [0.5266788 ],
         [0.5346375 ]],

        [[0.34707475],
         [0.35110432],
         [0.48556876],
         ...,
         [0.48617148],
         [0.45155916],
         [0.442432  ]],

        ...,

        [[0.05674789],
         [0.04811798],
         [0.04651972],
         ...,
         [0.03082467],
         [0.03012618],
         [0.02942769]],

        [[0.06927954],
         [0.0627451 ],
         [0.06111448],
         ...,
         [0.03524984],
         [0.03793816],
         [0.03723967]],

        [[0.06858105],
         [0.0627451 ],
         [0.06080024],
         ...,
         [0.04980775],
         [0.04901735],
         [0.04796961]]],


       [[[0.61063147],
         [0.60866857],
         [0.5974066 ],
         ...,
         [0.6445677 ],
         [0.67514735],
         [0.7132484 ]],

        [[0.47983614],
         [0.48420575],
         [0.48743445],
         ...,
         [0.62010056],
         [0.6601194 ],
         [0.68825233]],

        [[0.44705883],
         [0.43711048],
         [0.43170914],
         ...,
         [0.5457172 ],
         [0.5704609 ],
         [0.5750783 ]],

        ...,

        [[0.21285367],
         [0.18153995],
         [0.1537464 ],
         ...,
         [0.3353684 ],
         [0.3238554 ],
         [0.31853908]],

        [[0.16801944],
         [0.14984499],
         [0.13183115],
         ...,
         [0.2954927 ],
         [0.31531158],
         [0.32156864]],

        [[0.15646015],
         [0.1438341 ],
         [0.12905821],
         ...,
         [0.29303133],
         [0.31547937],
         [0.32244837]]],


       [[[0.51813877],
         [0.49426022],
         [0.4519147 ],
         ...,
         [0.45986646],
         [0.44618922],
         [0.46440253]],

        [[0.5210764 ],
         [0.5013105 ],
         [0.4548523 ],
         ...,
         [0.5556    ],
         [0.56551874],
         [0.57433164]],

        [[0.520566  ],
         [0.51003885],
         [0.4852476 ],
         ...,
         [0.6142594 ],
         [0.6231323 ],
         [0.59963125]],

        ...,

        [[0.07391235],
         [0.07449988],
         [0.07508741],
         ...,
         [0.08627451],
         [0.08627451],
         [0.08578544]],

        [[0.07778966],
         [0.07837719],
         [0.07896471],
         ...,
         [0.08627451],
         [0.08627451],
         [0.08627451]],

        [[0.08166697],
         [0.08225449],
         [0.08278957],
         ...,
         [0.08627451],
         [0.08627451],
         [0.08627451]]]], dtype=float32), array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0]], dtype=uint8)).
Traceback (most recent call last):

  File "/Users/I569354/Desktop/Praxisarbeiten/Studienarbeit_Sammlung/MedicaldeepLearning/.venv/lib/python3.9/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 204, in generator_py_func
    flattened_values = nest.flatten_up_to(output_types, values)

  File "/Users/I569354/Desktop/Praxisarbeiten/Studienarbeit_Sammlung/MedicaldeepLearning/.venv/lib/python3.9/site-packages/tensorflow/python/data/util/nest.py", line 237, in flatten_up_to
    return nest_util.flatten_up_to(

  File "/Users/I569354/Desktop/Praxisarbeiten/Studienarbeit_Sammlung/MedicaldeepLearning/.venv/lib/python3.9/site-packages/tensorflow/python/util/nest_util.py", line 1541, in flatten_up_to
    return _tf_data_flatten_up_to(shallow_tree, input_tree)

  File "/Users/I569354/Desktop/Praxisarbeiten/Studienarbeit_Sammlung/MedicaldeepLearning/.venv/lib/python3.9/site-packages/tensorflow/python/util/nest_util.py", line 1570, in _tf_data_flatten_up_to
    _tf_data_assert_shallow_structure(shallow_tree, input_tree)

  File "/Users/I569354/Desktop/Praxisarbeiten/Studienarbeit_Sammlung/MedicaldeepLearning/.venv/lib/python3.9/site-packages/tensorflow/python/util/nest_util.py", line 1427, in _tf_data_assert_shallow_structure
    raise ValueError(

ValueError: The two structures don't have the same sequence length. Input structure has length 2, while shallow structure has length 3.


The above exception was the direct cause of the following exception:


Traceback (most recent call last):

  File "/Users/I569354/Desktop/Praxisarbeiten/Studienarbeit_Sammlung/MedicaldeepLearning/.venv/lib/python3.9/site-packages/tensorflow/python/ops/script_ops.py", line 270, in __call__
    ret = func(*args)

  File "/Users/I569354/Desktop/Praxisarbeiten/Studienarbeit_Sammlung/MedicaldeepLearning/.venv/lib/python3.9/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)

  File "/Users/I569354/Desktop/Praxisarbeiten/Studienarbeit_Sammlung/MedicaldeepLearning/.venv/lib/python3.9/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 206, in generator_py_func
    raise TypeError(

TypeError: `generator` yielded an element that did not match the expected structure. The expected structure was (tf.float32, tf.uint8, tf.float32), but the yielded element was (array([[[[0.6187597 ],
         [0.6087174 ],
         [0.6042217 ],
         ...,
         [0.68128   ],
         [0.6775488 ],
         [0.6745463 ]],

        [[0.6067892 ],
         [0.6084383 ],
         [0.59418255],
         ...,
         [0.5349604 ],
         [0.5324024 ],
         [0.5253816 ]],

        [[0.5271776 ],
         [0.59249705],
         [0.5684988 ],
         ...,
         [0.4987047 ],
         [0.546794  ],
         [0.56442624]],

        ...,

        [[0.08235294],
         [0.08132268],
         [0.0766023 ],
         ...,
         [0.08235294],
         [0.08169755],
         [0.07983821]],

        [[0.08206154],
         [0.08103128],
         [0.07701527],
         ...,
         [0.07698391],
         [0.07864852],
         [0.07556193]],

        [[0.07843138],
         [0.07792283],
         [0.07450981],
         ...,
         [0.07480989],
         [0.07813998],
         [0.07479253]]],


       [[[0.6181365 ],
         [0.61851746],
         [0.610684  ],
         ...,
         [0.59139454],
         [0.60987514],
         [0.6303877 ]],

        [[0.5909889 ],
         [0.5974647 ],
         [0.6014144 ],
         ...,
         [0.59368014],
         [0.6113989 ],
         [0.63267326]],

        [[0.5604936 ],
         [0.5574461 ],
         [0.5847517 ],
         ...,
         [0.62232596],
         [0.62784326],
         [0.64184093]],

        ...,

        [[0.08556504],
         [0.08224251],
         [0.08143471],
         ...,
         [0.10196079],
         [0.10196079],
         [0.10196079]],

        [[0.0851841 ],
         [0.08128109],
         [0.07737806],
         ...,
         [0.09549951],
         [0.09626137],
         [0.09702323]],

        [[0.08480318],
         [0.08090016],
         [0.07699713],
         ...,
         [0.08851886],
         [0.09128649],
         [0.09166741]]],


       [[[0.5739572 ],
         [0.5037051 ],
         [0.51948947],
         ...,
         [0.54456383],
         [0.5425717 ],
         [0.540619  ]],

        [[0.57932705],
         [0.5134685 ],
         [0.51411957],
         ...,
         [0.53120244],
         [0.5398168 ],
         [0.54030496]],

        [[0.58974236],
         [0.52705413],
         [0.48600683],
         ...,
         [0.55940545],
         [0.5285197 ],
         [0.5260788 ]],

        ...,

        [[0.14631352],
         [0.144849  ],
         [0.14193186],
         ...,
         [0.53255063],
         [0.52268994],
         [0.48571447]],

        [[0.13638338],
         [0.13589521],
         [0.13947015],
         ...,
         [0.49133724],
         [0.4808357 ],
         [0.46128386]],

        [[0.13753846],
         [0.13997932],
         [0.14351521],
         ...,
         [0.4923136 ],
         [0.4827884 ],
         [0.46470106]]],


       ...,


       [[[0.4626523 ],
         [0.46404928],
         [0.50517446],
         ...,
         [0.5597482 ],
         [0.52947277],
         [0.53324056]],

        [[0.4252921 ],
         [0.43437245],
         [0.5344552 ],
         ...,
         [0.5554439 ],
         [0.5266788 ],
         [0.5346375 ]],

        [[0.34707475],
         [0.35110432],
         [0.48556876],
         ...,
         [0.48617148],
         [0.45155916],
         [0.442432  ]],

        ...,

        [[0.05674789],
         [0.04811798],
         [0.04651972],
         ...,
         [0.03082467],
         [0.03012618],
         [0.02942769]],

        [[0.06927954],
         [0.0627451 ],
         [0.06111448],
         ...,
         [0.03524984],
         [0.03793816],
         [0.03723967]],

        [[0.06858105],
         [0.0627451 ],
         [0.06080024],
         ...,
         [0.04980775],
         [0.04901735],
         [0.04796961]]],


       [[[0.61063147],
         [0.60866857],
         [0.5974066 ],
         ...,
         [0.6445677 ],
         [0.67514735],
         [0.7132484 ]],

        [[0.47983614],
         [0.48420575],
         [0.48743445],
         ...,
         [0.62010056],
         [0.6601194 ],
         [0.68825233]],

        [[0.44705883],
         [0.43711048],
         [0.43170914],
         ...,
         [0.5457172 ],
         [0.5704609 ],
         [0.5750783 ]],

        ...,

        [[0.21285367],
         [0.18153995],
         [0.1537464 ],
         ...,
         [0.3353684 ],
         [0.3238554 ],
         [0.31853908]],

        [[0.16801944],
         [0.14984499],
         [0.13183115],
         ...,
         [0.2954927 ],
         [0.31531158],
         [0.32156864]],

        [[0.15646015],
         [0.1438341 ],
         [0.12905821],
         ...,
         [0.29303133],
         [0.31547937],
         [0.32244837]]],


       [[[0.51813877],
         [0.49426022],
         [0.4519147 ],
         ...,
         [0.45986646],
         [0.44618922],
         [0.46440253]],

        [[0.5210764 ],
         [0.5013105 ],
         [0.4548523 ],
         ...,
         [0.5556    ],
         [0.56551874],
         [0.57433164]],

        [[0.520566  ],
         [0.51003885],
         [0.4852476 ],
         ...,
         [0.6142594 ],
         [0.6231323 ],
         [0.59963125]],

        ...,

        [[0.07391235],
         [0.07449988],
         [0.07508741],
         ...,
         [0.08627451],
         [0.08627451],
         [0.08578544]],

        [[0.07778966],
         [0.07837719],
         [0.07896471],
         ...,
         [0.08627451],
         [0.08627451],
         [0.08627451]],

        [[0.08166697],
         [0.08225449],
         [0.08278957],
         ...,
         [0.08627451],
         [0.08627451],
         [0.08627451]]]], dtype=float32), array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0]], dtype=uint8)).


	 [[{{node PyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_one_step_on_iterator_8127]

2024-10-24 12:02:12.539993: W tensorflow/core/framework/op_kernel.cc:1828] INVALID_ARGUMENT: TypeError: `generator` yielded an element that did not match the expected structure. The expected structure was (tf.float32, tf.uint8, tf.float32), but the yielded element was (array([[[[0.38772213],
         [0.39752835],
         [0.411318  ],
         ...,
         [0.50597423],
         [0.5058824 ],
         [0.5133844 ]],

        [[0.29846188],
         [0.30119514],
         [0.3152473 ],
         ...,
         [0.5058824 ],
         [0.5067285 ],
         [0.51295817]],

        [[0.53477526],
         [0.50812346],
         [0.45982304],
         ...,
         [0.43455455],
         [0.44185168],
         [0.46788388]],

        ...,

        [[0.09710129],
         [0.09523139],
         [0.10615807],
         ...,
         [0.25623882],
         [0.26159796],
         [0.26139504]],

        [[0.09765554],
         [0.09104216],
         [0.10403273],
         ...,
         [0.26119