<a href="https://colab.research.google.com/github/Chendie-yiran/Machine_learning/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install medmnist

Collecting medmnist
  Downloading medmnist-3.0.2-py3-none-any.whl.metadata (14 kB)
Collecting fire (from medmnist)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading medmnist-3.0.2-py3-none-any.whl (25 kB)
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=d2195d505d8fea15b2283be516e293aa378010cb62550da8434af7fd01ac7f1b
  Stored in directory: /root/.cache/pip/wheels/19/39/2f/2d3cadc408a8804103f1c34ddd4b9f6a93497b11fa96fe738e
Successfully built fire
Installing collected packages: fire, medmnist
Successfully installed fire-0.7.0 medmnist-3.0.2


In [16]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score
from medmnist import BloodMNIST

# pre-processing
def get_data(data):
    X = []
    y = []

    for i in range(len(data)):
        x, label = data[i]
        x = np.array(x)
        X.append(x)
        y.append(label[0])

    # Convert X and y to numpy arrays before returning
    X = np.array(X)
    y = np.array(y)

    # Normalize X to range [0, 1]
    X = X / 255.0

    return X, y

# results obtained
def get_result(pred, actual, name):
    accuracy = accuracy_score(actual, pred)
    confusion = confusion_matrix(actual, pred)
    precision = precision_score(actual, pred, average='weighted')  # Weighted for multi-class
    recall = recall_score(actual, pred, average='weighted')
    print(name, " Accuracy: ", accuracy)
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(name, " Confusion Matrix: \n", confusion)

# obtain the dataset
training_data = BloodMNIST(split='train', download=True, as_rgb=False)
validation_data = BloodMNIST(split='val', download=True, as_rgb=False)
test_data = BloodMNIST(split='test', download=True, as_rgb=False)

# obtain the data
X_train, y_train = get_data(training_data)
X_val, y_val = get_data(validation_data)
X_test, y_test = get_data(test_data)

print("Training data shape:", X_train.shape)
print("Validation data shape:", X_val.shape)
print("Test data shape:", X_test.shape)

print("Training labels shape:", y_train.shape)
print("Validation labels shape:", y_val.shape)
print("Test labels shape:", y_test.shape)

# label convert into one-hot encoded vector
num_classes = len(np.unique(y_train))
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

# CNN model
def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        # first layer
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),

        # second layer
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),

        # fully connected layer
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')  # output layer
    ])
    return model

# struct the model
input_shape = (28, 28, 3)
model = build_cnn_model(input_shape, num_classes)


model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# train the model
history=model.fit(X_train, y_train,
          validation_data=(X_val, y_val),
          epochs=15,
          batch_size=16)
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

print(f"Final Training Accuracy: {train_accuracy[-1]:.4f}")
print(f"Final Validation Accuracy: {val_accuracy[-1]:.4f}")

# analysis the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")

# predict
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# evaluate the result
get_result(y_pred_classes, y_test_classes, "CNN Model")


Using downloaded and verified file: /root/.medmnist/bloodmnist.npz
Using downloaded and verified file: /root/.medmnist/bloodmnist.npz
Using downloaded and verified file: /root/.medmnist/bloodmnist.npz
Training data shape: (11959, 28, 28, 3)
Validation data shape: (1712, 28, 28, 3)
Test data shape: (3421, 28, 28, 3)
Training labels shape: (11959,)
Validation labels shape: (1712,)
Test labels shape: (3421,)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 21ms/step - accuracy: 0.4714 - loss: 1.3900 - val_accuracy: 0.7290 - val_loss: 0.7332
Epoch 2/15
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 21ms/step - accuracy: 0.7251 - loss: 0.7538 - val_accuracy: 0.7932 - val_loss: 0.5198
Epoch 3/15
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 21ms/step - accuracy: 0.7839 - loss: 0.5946 - val_accuracy: 0.8271 - val_loss: 0.4556
Epoch 4/15
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 20ms/step - accuracy: 0.8077 - loss: 0.5307 - val_accuracy: 0.8598 - val_loss: 0.4023
Epoch 5/15
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 20ms/step - accuracy: 0.8264 - loss: 0.4877 - val_accuracy: 0.8680 - val_loss: 0.3770
Epoch 6/15
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 22ms/step - accuracy: 0.8471 - loss: 0.4428 - val_accuracy: 0.8820 - val_loss: 0.3549
Epoch 7/15
[1m7