In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import shutil
import cv2
import pickle

In [16]:
# libraries for image processing
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [3]:
# Load text file
fold_df = pd.read_csv('dataset/Folds.csv')

In [4]:
# Define paths
img_path = "./BreakHis_v1"
classes = ['benign', 'malign']

In [5]:
# Rename column filename to path
fold_df = fold_df.rename(columns={"filename": "path"})
# Print head of file
fold_df.head(3)

Unnamed: 0,fold,mag,grp,path
0,1,100,train,BreaKHis_v1/histology_slides/breast/benign/SOB...
1,1,100,train,BreaKHis_v1/histology_slides/breast/benign/SOB...
2,1,100,train,BreaKHis_v1/histology_slides/breast/benign/SOB...


In [6]:
# One example path is printed
fold_df['path'][0]

'BreaKHis_v1/histology_slides/breast/benign/SOB/adenosis/SOB_B_A_14-22549AB/100X/SOB_B_A-14-22549AB-100-001.png'

In [7]:
# Extract the filename and label from the path column
fold_df['filename'] = fold_df['path'].apply(lambda x: x.split('/')[-1])
fold_df['label'] = fold_df['path'].apply(lambda x: x.split('/')[3])

In [8]:
fold_df.head(3)

Unnamed: 0,fold,mag,grp,path,filename,label
0,1,100,train,BreaKHis_v1/histology_slides/breast/benign/SOB...,SOB_B_A-14-22549AB-100-001.png,benign
1,1,100,train,BreaKHis_v1/histology_slides/breast/benign/SOB...,SOB_B_A-14-22549AB-100-002.png,benign
2,1,100,train,BreaKHis_v1/histology_slides/breast/benign/SOB...,SOB_B_A-14-22549AB-100-003.png,benign


In [None]:
# Create new directory
os.makedirs('./Cancer/')

In [9]:
# Clear Cancer directory and remake with subfolders
cancer_dir = "./Cancer"
if os.path.exists(cancer_dir):
    shutil.rmtree(cancer_dir)
os.makedirs(os.path.join(cancer_dir, "benign"))
os.makedirs(os.path.join(cancer_dir, "malignant"))

# Move images into their respective class folders
for idx, row in fold_df.iterrows():
    src = os.path.join("./dataset/BreaKHis_v1/", row["path"])

    filename = row['filename']
    label = row['label']
    dest = os.path.join(cancer_dir, label, filename)
    shutil.copyfile(src, dest)

print("Dataset reorganized into benign and malignant folders")

Dataset reorganized into benign and malignant folders


In [13]:
# check length
print(len(os.listdir("./Cancer/benign")))
print(len(os.listdir("./Cancer/malignant")))

2480
5429


All images now stored in a single folder

In [14]:
# from Ben's train_model.py

# define dataset path
data_dir = "./Cancer"
batch_size = 32
image_size = (150, 150)

# Data preprocessing
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_gen = datagen.flow_from_directory(data_dir, target_size=image_size, batch_size=batch_size, class_mode='categorical', subset='training')
val_gen = datagen.flow_from_directory(data_dir, target_size=image_size, batch_size=batch_size, class_mode='categorical', subset='validation')

# save class indices
with open('class_indices.pkl', 'wb') as f:
    pickle.dump(train_gen.class_indices, f)

Found 6328 images belonging to 2 classes.
Found 1581 images belonging to 2 classes.


In [15]:
# Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(train_gen.class_indices), activation='sigmoid')
])

In [17]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(train_gen, validation_data=val_gen, epochs=20, callbacks=[early_stopping])

# Save Model
model.save('image_classifier.h5')
print("Model training completed and saved!")

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Model training completed and saved!
