In [2]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import pandas as pd

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [3]:
os.listdir("/kaggle/input")

['abnormal-final', 'normal-final']

In [4]:
# Define path to your data folder
data_dir = "/kaggle/input"

In [5]:
# Define image parameters
img_width, img_height = 224, 224
input_shape = (img_width, img_height, 1)  # 3 channels for RGB images

In [6]:
# Split the data into training and validation sets
images = []
labels = []

for class_name in os.listdir(data_dir):
    class_dir = os.path.join(data_dir, class_name)
    if os.path.isdir(class_dir):
        for image_name in os.listdir(class_dir):
            image_path = os.path.join(class_dir, image_name)
            images.append(image_path)
            labels.append(class_name)

images_train, images_val, labels_train, labels_val = train_test_split(
    images,
    labels,
    test_size=0.2,
    random_state=42,
    stratify=labels
)

In [7]:
# Define data generator with data augmentation
datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Load and preprocess the training data
train_generator = datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'image': images_train, 'label': labels_train}),
    x_col='image',
    y_col='label',
    target_size=(img_width, img_height),
    batch_size=32,
    color_mode='grayscale',  # Set color_mode to 'grayscale'
    class_mode='binary',
    shuffle=True,
    classes=['normal-final', 'abnormal-final'],
    class_labels=[0, 1]
)

# Load and preprocess the validation data
validation_generator = datagen.flow_from_dataframe(
    dataframe=pd.DataFrame({'image': images_val, 'label': labels_val}),
    x_col='image',
    y_col='label',
    target_size=(img_width, img_height),
    batch_size=32,
    color_mode='grayscale',
    class_mode='binary',
    shuffle=False,
    classes=['normal-final', 'abnormal-final'],
    class_labels=[0, 1]
)

Found 345 validated image filenames belonging to 2 classes.
Found 87 validated image filenames belonging to 2 classes.


In [8]:
# Build the CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 54, 54, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 26, 26, 128)      0

In [None]:
## Train the model
model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size
)

In [None]:
model.save("breast-cancer-classification.keras")

In [None]:
from sklearn.metrics import classification_report
# Evaluate the model on the validation data
y_true = validation_generator.classes
y_pred = model.predict(validation_generator)
y_pred = np.where(y_pred > 0.5, 1, 0)

# Calculate the evaluation metrics
report = classification_report(y_true, y_pred, target_names=['Normal-final', 'Abnormal-final'])
print(report)

In [None]:
# Calculate the evaluation metrics
report = classification_report(y_true, y_pred, target_names=['Normal', 'Abnormal'])
print(report)