In [15]:
import os
import shutil
import random

# Paths
base_dir = "archive (9)"
train_dir = os.path.join(base_dir, "train_cancer")
test_dir = os.path.join(base_dir, "testing_cancer")

# Subfolders for classes
classes = ["Benign", "Malignant"]

# Create testing_cancer directory and class subdirectories
os.makedirs(test_dir, exist_ok=True)
for class_name in classes:
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

# Split percentage (e.g., 20% of data goes to the test set)
split_ratio = 0.2

# Loop through each class folder
for class_name in classes:
    # Get the list of files in the class directory
    class_train_dir = os.path.join(train_dir, class_name)
    files = os.listdir(class_train_dir)
    
    # Shuffle the files
    random.shuffle(files)
    
    # Calculate the split index
    split_index = int(len(files) * split_ratio)
    
    # Move the files to the test directory
    for file_name in files[:split_index]:
        src = os.path.join(class_train_dir, file_name)
        dst = os.path.join(test_dir, class_name, file_name)
        shutil.move(src, dst)
    
    print(f"Moved {split_index} files from {class_name} to the test set.")

print("Data split complete!")


Moved 6 files from Benign to the test set.
Moved 48 files from Malignant to the test set.
Data split complete!


In [16]:
import os

# Paths
train_dir = "archive (9)/train_cancer"

# Subfolders for classes
classes = ["benign", "malignant"]

# Dictionary to hold the file counts
file_counts = {}

# Loop through each class folder and count the number of files
for class_name in classes:
    class_dir = os.path.join(train_dir, class_name)
    files = os.listdir(class_dir)
    
    # Count the number of files
    file_counts[class_name] = len(files)

# Print the results
for class_name, count in file_counts.items():
    print(f"{class_name}: {count} files")


benign: 24 files
malignant: 192 files


In [17]:
import tensorflow as tf

# Define directory and image shape
Train_dir = "archive (9)/train_cancer"
Test_dir = "archive (9)/testing_cancer"

In [18]:
train_data = tf.keras.preprocessing.image_dataset_from_directory(Train_dir,
                                                                 image_size= (154,154),
                                                                 batch_size =32)

test_data = tf.keras.preprocessing.image_dataset_from_directory(Test_dir,
                                                                 image_size= (154,154),
                                                                 batch_size =32)

Found 216 files belonging to 2 classes.
Found 54 files belonging to 2 classes.


In [22]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, models
from sklearn.utils import class_weight
import numpy as np

# Define paths and image shape
Train_dir = "archive (9)/train_cancer"
Test_dir = "archive (9)/testing_cancer"
IMG_shape = (154, 154)

# Load datasets
train_data = tf.keras.preprocessing.image_dataset_from_directory(Train_dir,
                                                                 image_size=IMG_shape,
                                                                 batch_size=32)
test_data = tf.keras.preprocessing.image_dataset_from_directory(Test_dir,
                                                                 image_size=IMG_shape,
                                                                 batch_size=32)

# Function to get labels from dataset
def get_labels(dataset):
    labels = []
    for images, label_batch in dataset:
        labels.extend(label_batch.numpy())
    return np.array(labels)

# Get labels for computing class weights
train_labels = get_labels(train_data)
class_names = train_data.class_names  # ['Benign', 'Malignant']
classes = np.arange(len(class_names))  # Array of class indices

# Compute class weights
class_weights = class_weight.compute_class_weight(
    'balanced',
    classes=classes,
    y=train_labels
)

# Convert class weights to dictionary
class_weights_dict = dict(enumerate(class_weights))
print("Class Weights:", class_weights_dict)

# Build and compile the model
base_model = EfficientNetB0(input_shape=(154, 154, 3), include_top=False, weights='imagenet')
base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(class_names), activation='softmax')  # Adjusted for multi-class classification
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model with class weights
history = model.fit(train_data,
                    validation_data=test_data,
                    epochs=10,
                    class_weight=class_weights_dict)  # Apply class weights

# Evaluate the model
results = model.evaluate(test_data)
print("Test Loss, Test Accuracy:", results)


Found 216 files belonging to 2 classes.
Found 54 files belonging to 2 classes.
Class Weights: {0: 4.5, 1: 0.5625}
Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2s/step - accuracy: 0.4321 - loss: 0.8468 - val_accuracy: 0.8889 - val_loss: 0.4022
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 516ms/step - accuracy: 0.7860 - loss: 0.4262 - val_accuracy: 0.4444 - val_loss: 0.7793
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 827ms/step - accuracy: 0.7494 - loss: 0.3463 - val_accuracy: 0.8704 - val_loss: 0.3418
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 784ms/step - accuracy: 0.9033 - loss: 0.2739 - val_accuracy: 0.8889 - val_loss: 0.2658
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 811ms/step - accuracy: 0.8922 - loss: 0.2325 - val_accuracy: 0.8148 - val_loss: 0.4023
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 893ms/step - 