<a href="https://colab.research.google.com/github/Madhuram2901/GDGC_MACHINE_LEARNING_MADHURAM/blob/main/Bone_Marrow_Cell_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing Dataset**

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("andrewmvd/bone-marrow-cell-classification")

print("Path to dataset files:", path)

# **Importing Dependencies**

In [None]:
!pip install tensorflow matplotlib seaborn scikit-learn

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.applications import ResNet50
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, roc_curve
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


# **Data PreProcessing**

In [None]:
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)


train_generator = datagen.flow_from_directory(
    path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_directory(
    path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)


test_generator = datagen.flow_from_directory(
    path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset=None
)


# **Custom CNN Model**

In [None]:
from PIL import Image, ImageFile

# Set the ImageFile.LOAD_TRUNCATED_IMAGES attribute to True
ImageFile.LOAD_TRUNCATED_IMAGES = True

custom_cnn = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(train_generator.num_classes, activation='softmax')
])

custom_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
custom_cnn.fit(train_generator, validation_data=val_generator, epochs=10)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10


  return self.fn(y_true, y_pred, **self._fn_kwargs)
  self._warn_if_super_not_called()


[1m  39/4285[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:53:41[0m 3s/step - accuracy: 1.0000 - loss: 0.0000e+00

# **Pre-Trained Model ResNet50**

In [None]:
#resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

#x = Flatten()(resnet_model.output)
#x = Dense(128, activation='relu')(x)
#output = Dense(train_generator.num_classes, activation='softmax')(x)

#pretrained_cnn = Model(inputs=resnet_model.input, outputs=output)
#pretrained_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#pretrained_cnn.fit(train_generator, validation_data=val_generator, epochs=10)


# **Model Evaluation**

## **Get Predictions**

In [None]:
custom_predictions = np.argmax(custom_cnn.predict(test_generator), axis=-1)

#pretrained_predictions = np.argmax(pretrained_cnn.predict(test_generator), axis=-1)

true_label = test_generator.classes

## **Accuracy for both models**

In [None]:
print(f"Custom CNN Accuracy: {custom_predictions[1]:.2f}")
#print(f"Pre-trained CNN Accuracy: {pretrained_predictions[1]:.2f}")

## **F1 Score**

In [None]:
from sklearn.metrics import f1_score

f1 = f1_score(true_labels, predictions, average='weighted')
print(f"Weighted F1-Score: {f1:.2f}")


## **Confusion Matrix**

In [None]:
from sklearn.metrics import confusion_matrix

print("Custom CNN Confusion Matrix")
print(confusion_matrix(true_labels, custom_predictions))

#print("Pre-trained CNN Confusion Matrix")
#print(confusion_matrix(true_labels, pretrained_predictions))

## **Complete Report**

In [None]:
from sklearn.metrics import classification_report

true_labels = test_generator.classes
# For Custom Model

customcnn_report = classification_report(true_labels, custom_predictions, target_names=class_names)
print("Classification Report - Custom CNN")
print(customcnn_report)

# For Pre-built Model(ResNet50)

#resnet_report = classification_report(true_labels, pretrained_predictions, target_names=class_names)
#print("Classification Report - Pretrained( ResNet50 Mdoel)")
#print(resnet_report)

# **Data Visualization**

## **ROC Curve**

In [None]:
# Import additional libraries
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize

# Binarize true labels for ROC curve
n_classes = len(class_names)
true_labels_binarized = label_binarize(true_labels, classes=range(n_classes))

# Get predicted probabilities
prob_predictions = custom_cnn.predict(test_generator)

# Plot ROC Curve
plt.figure(figsize=(8, 6))
for i in range(n_classes):
    fpr, tpr, _ = roc_curve(true_labels_binarized[:, i], prob_predictions[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'Class {class_names[i]} (AUC = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line
plt.title("ROC Curve - Custom CNN")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="lower right")
plt.grid()
plt.show()

# For Pre-Built Model ROC Curve

# Binarize true labels for ROC curve
#n_classes = len(class_names)
#true_labels_binarized = label_binarize(true_labels, classes=range(n_classes))

# Get predicted probabilities
#prob_predictions = pretrained_cnn.predict(test_generator)

# Plot ROC Curve
#plt.figure(figsize=(8, 6))
#for i in range(n_classes):
    #fpr, tpr, _ = roc_curve(true_labels_binarized[:, i], prob_predictions[:, i])
    #roc_auc = auc(fpr, tpr)
    #plt.plot(fpr, tpr, label=f'Class {class_names[i]} (AUC = {roc_auc:.2f})')

#plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line
#plt.title("ROC Curve - ResNet50 Model(Prebuilt)")
#plt.xlabel("False Positive Rate")
#plt.ylabel("True Positive Rate")
#plt.legend(loc="lower right")
#plt.grid()
#plt.show()



## **Confusion Matrix Plot**

In [None]:
true_labels = test_generator.classes
predictions = np.argmax(custom_cnn.predict(test_generator), axis=-1)
cm = confusion_matrix(true_labels, predictions)

plt.figure(figsize=(8, 6))
plt.imshow(cm, cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Custom CNN")
plt.colorbar()

plt.xticks(np.arange(len(class_names)), class_names, rotation=45)
plt.yticks(np.arange(len(class_names)), class_names)

# Adding number to each cell( format)
for i in range(len(cm)):
    for j in range(len(cm[i])):
        plt.text(j, i, cm[i, j], ha='center', va='center', color='blue')

plt.ylabel("True Labels")
plt.xlabel("Predicted Labels")
plt.tight_layout()
plt.show()
