In [None]:

import tensorflow as tf

gpu_devices = tf.config.experimental.list_physical_devices('GPU')

if gpu_devices:
    tf.config.experimental.set_visible_devices(gpu_devices[1], 'GPU')
    print(gpu_devices[1])
    print('Success')



In [2]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
# Path to your dataset
data_dir = '/kaggle/input/bone_marrow_cell_dataset'

# List to store image paths and labels
image_paths = []
labels = []

# Loop through class directories

for class_name in os.listdir(data_dir):
    class_dir = os.path.join(data_dir, class_name)
    
    if os.path.isdir(class_dir):
        for sub_dir_name in os.listdir(class_dir):
            sub_dir_path = os.path.join(class_dir, sub_dir_name)
            
            if os.path.isdir(sub_dir_path):
                for img_name in tqdm(os.listdir(sub_dir_path), desc=f"Processing {class_name}/{sub_dir_name}"):
                    img_path = os.path.join(sub_dir_path, img_name)
                    if os.path.isfile(img_path):
                        image_paths.append(img_path)
                        labels.append(class_name)
            else:
                img_path = os.path.join(class_dir, sub_dir_name)
                if os.path.isfile(img_path):
                    image_paths.append(img_path)
                    labels.append(class_name)
    else:
        img_path = class_dir  # For classes without subdirectories
        if os.path.isfile(img_path):
            image_paths.append(img_path)
            labels.append(class_name)

Processing NGS/3001-4000: 100%|██████████| 1000/1000 [00:00<00:00, 3487.87it/s]
Processing NGS/19001-20000: 100%|██████████| 1000/1000 [00:00<00:00, 3342.97it/s]
Processing NGS/0001-1000: 100%|██████████| 1000/1000 [00:00<00:00, 3576.92it/s]
Processing NGS/4001-5000: 100%|██████████| 1000/1000 [00:00<00:00, 3546.87it/s]
Processing NGS/7001-8000: 100%|██████████| 1000/1000 [00:00<00:00, 3464.46it/s]
Processing NGS/26001-27000: 100%|██████████| 1000/1000 [00:00<00:00, 3834.40it/s]
Processing NGS/13001-14000: 100%|██████████| 1000/1000 [00:00<00:00, 3706.16it/s]
Processing NGS/29001-29424: 100%|██████████| 424/424 [00:00<00:00, 94239.04it/s]
Processing NGS/12001-13000: 100%|██████████| 1000/1000 [00:00<00:00, 3772.61it/s]
Processing NGS/25001-26000: 100%|██████████| 1000/1000 [00:00<00:00, 3755.55it/s]
Processing NGS/27001-28000: 100%|██████████| 1000/1000 [00:00<00:00, 3582.33it/s]
Processing NGS/28001-29000: 100%|██████████| 1000/1000 [00:00<00:00, 3582.99it/s]
Processing NGS/14001-1500

In [4]:
# Splitting the data into train, test, and validation sets
X_train, X_temp, y_train, y_temp = train_test_split(image_paths, labels, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Now X_train, y_train, X_val, y_val, X_test, y_test are the organized data for training, validation, and testing
# Combine all labels from train, validation, and test sets
combined_labels = y_train + y_val + y_test

# Print all unique classes
unique_classes = set(combined_labels)
print("Unique classes:", unique_classes)

Unique classes: {'NIF', 'EBO', 'HAC', 'BLA', 'NGS', 'ART', 'EOS', 'ABE', 'OTH', 'MON', 'KSC', 'PMO', 'LYT', 'MMZ', 'FGC', 'NGB', 'BAS', 'PEB', 'LYI', 'PLM', 'MYB'}


In [5]:
from sklearn.preprocessing import LabelEncoder

# Convert string labels to numerical labels using LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

# Calculate the number of classes directly from the labels
num_classes = len(set(combined_labels))

# Convert numerical labels to one-hot encoded vectors
y_train_onehot = to_categorical(y_train_encoded, num_classes)
y_val_onehot = to_categorical(y_val_encoded, num_classes)
y_test_onehot = to_categorical(y_test_encoded, num_classes)

num_classes


21

In [6]:
# Change the input shape to match InceptionV3's input shape
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(250, 250, 3))

# Add custom layers on top of the base model
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [8]:
model.summary()

In [9]:
# Preprocess images using ImageDataGenerator
datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  
    rescale=1. / 255
)


batch_size = 32

# Define a custom generator to load and preprocess images on-the-fly
def custom_generator(image_paths, labels, batch_size):
    num_samples = len(image_paths)
    while True:
        indices = np.random.choice(num_samples, size=batch_size, replace=False)
        batch_images = []
        batch_labels = []
        for idx in indices:
            img_path = image_paths[idx]
            label = labels[idx]
            try:
                img = load_img(img_path, target_size=(250, 250))  # Resize images to VGG-16 input size
                img_array = img_to_array(img)
                batch_images.append(img_array)
                batch_labels.append(label)
            except Exception as e:
                print(f"Error loading image {img_path}: {e}")
        yield np.array(batch_images), to_categorical(label_encoder.transform(batch_labels), num_classes)

In [None]:

filepath = 'InceptionV3_weights.hdf5'
callbacks_list = [EarlyStopping(monitor='val_accuracy', patience=10, verbose=1),
                  ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max'),
                  ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, verbose=1)]

In [10]:
# Create custom generators for training, validation, and test sets
train_generator = custom_generator(X_train, y_train, batch_size)
val_generator = custom_generator(X_val, y_val, batch_size)
test_generator = custom_generator(X_test, y_test, batch_size)


In [None]:
epochs = 5

# Train the model using the custom generator
model.fit(
    custom_generator(X_train, y_train, batch_size),
    steps_per_epoch=len(X_train) // batch_size,
    epochs=epochs,
    validation_data=custom_generator(X_val, y_val, batch_size),
    validation_steps=len(X_val) // batch_size,
    verbose=1,
#     callbacks=callbacks_list
)

Epoch 1/5


I0000 00:00:1718023934.033720     115 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1718023934.147360     115 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m3305/3748[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m2:12[0m 298ms/step - accuracy: 0.3760 - loss: 2.1852Error loading image /kaggle/input/bone_marrow_cell_dataset/MYB/5001-6000/MYB_05527.jpg: broken data stream when reading image file
[1m3748/3748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312ms/step - accuracy: 0.3889 - loss: 2.1302

W0000 00:00:1718025108.471288     115 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m3748/3748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1482s[0m 368ms/step - accuracy: 0.3890 - loss: 2.1301 - val_accuracy: 0.5674 - val_loss: 1.3669
Epoch 2/5
[1m3572/3748[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m45s[0m 259ms/step - accuracy: 0.5541 - loss: 1.4744Error loading image /kaggle/input/bone_marrow_cell_dataset/MYB/5001-6000/MYB_05527.jpg: broken data stream when reading image file
[1m3748/3748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1097s[0m 293ms/step - accuracy: 0.5548 - loss: 1.4717 - val_accuracy: 0.6321 - val_loss: 1.1631
Epoch 3/5
[1m   4/3748[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16:12[0m 260ms/step - accuracy: 0.6738 - loss: 1.1925

In [None]:
model.save('InceptionV3_Model.h5')

In [None]:
model_final = load_model('InceptionV3_weights.hdf5')

In [None]:
# Evaluate the model on the test set using the custom generator
test_loss, test_accuracy = model_final.evaluate(
    test_generator,
    steps=len(X_test) // batch_size,
    verbose=1,
)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')



In [None]:

# Determine the size of the test dataset
test_size = len(X_test)

print("Total number of samples in the test dataset:", test_size)


In [None]:

from math import ceil
from sklearn.metrics import classification_report

# Determine the size of the test dataset
test_size = len(X_test)

# Calculate the number of steps based on batch size
batch_size = 32  # Adjust according to your generator's batch size
test_steps = ceil(test_size / batch_size)

# Initialize empty lists to store true and predicted labels
y_true = []
y_pred = []

# Iterate through the test generator to obtain predictions batch-wise
for i in range(test_steps):
    x_batch, y_batch = next(test_generator)
    y_true.extend(np.argmax(y_batch, axis=1))
    y_pred.extend(np.argmax(model_final.predict(x_batch), axis=1))



In [None]:

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

# Get the list of all possible classes
all_classes = sorted(set(y_true + y_pred + y_train_encoded.tolist()))

# Calculate confusion matrix with all classes
conf_matrix = confusion_matrix(y_true, y_pred, labels=all_classes)

# Plot confusion matrix
plt.figure(figsize=(11.7, 8.27), dpi=200)
plt.imshow(conf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()

tick_marks = np.arange(len(all_classes))

for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        plt.text(j, i, conf_matrix[i, j],
                 ha="center", va="center",
                 color="white" if conf_matrix[i, j] > conf_matrix.max() / 2 else "black")

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.xticks(tick_marks, unique_classes, rotation=45)
plt.yticks(tick_marks, unique_classes)
plt.savefig('InceptionV3.png')
plt.show()

# Calculate classification report
class_report = classification_report(y_true, y_pred,target_names=unique_classes)

# Print classification report
print("Classification Report:")
print(class_report)
