In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf 
from tensorflow.keras.preprocessing import image
import keras




In [3]:
path = 'C:/Users/diogo/Documents/GitHub/Deep-Learning/BreaKHis_v1/'

In [None]:
for dirname, _, filenames in os.walk('C:/Users/diogo/Documents/GitHub/Deep-Learning/BreaKHis_v1/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Get list of all image files in the specified path
image_files = glob.glob(os.path.join(path, '**', '*.png'), recursive=True)

# Create a DataFrame with the image files
df_images = pd.DataFrame(image_files, columns=['file_path'])

print(df_images)

In [6]:
from tensorflow.keras.applications import ResNet50, VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# Load the ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base layers (optional)
base_model.trainable = False

# Add custom layers for binary or multi-class classification
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dense(1, activation='sigmoid')  # For binary classification
    # Use Dense(8, activation='softmax') for multi-class classification (e.g., 8 cancer types)
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy','precision','recall'])

In [None]:
# Load and Preprocess Images

def preprocess_image(file_path):
    img = image.load_img(file_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = tf.keras.applications.resnet50.preprocess_input(img_array)
    return img_array

# Add a column for labels (assuming binary classification: 0 for benign, 1 for malignant)
df_images['label'] = df_images['file_path'].apply(lambda x: 1 if 'malignant' in x else 0)

# Preprocess images and create a numpy array
X = np.vstack(df_images['file_path'].apply(preprocess_image))
y = df_images['label'].values

# Split the data into training, validation, and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")

In [None]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=32)

In [None]:
# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred = np.round(y_pred)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

In [11]:
# Load VGG16 without the top layer (pre-trained on ImageNet)
base_model_vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base layers (optional)
base_model_vgg16.trainable = False

# Add custom layers for binary or multi-class classification
model_vgg16 = Sequential([
    base_model_vgg16,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dense(1, activation='sigmoid')  # For binary classification
    # Use Dense(8, activation='softmax') for multi-class classification (e.g., 8 cancer types)
])

# Compile the model
model_vgg16.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy','precision','recall'])

In [None]:
model_vgg16.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), batch_size=32)

In [None]:
# Predict and evaluate
y_pred_vgg16 = model_vgg16.predict(X_test)
y_pred_vgg16 = np.round(y_pred_vgg16)
print(classification_report(y_test, y_pred_vgg16))
print(confusion_matrix(y_test, y_pred_vgg16))