# Welcome to the second part of the ML image analysis and classification workshop!

In this notebook you will implement a very basic CNN model to try to classify the images

You will use Keras to implement the CNN. This is because Keras generally is easier to understand compared to PyTorch.

In [1]:
import cv2
import numpy as np
from pathlib import Path

In [2]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

def read_dataset(path: Path, labels = {"normal": 0, "viral pneumonia": 1, "covid": 2}) -> np.ndarray:
    """
        A function that reads a part of a dataset and saves it in numpy arrays
    """
    # rglob throughs to get all file extensions
    paths = list(path.rglob('*.jpeg')) + list(path.rglob('*.jpg')) + list(path.rglob('*.png'))
    
    # one list for labels, one for samples
    set_labels = []
    set_samples = []
    # Iterate through each path
    for path in paths:
        # Get the class name and read the image
        class_name = path.parent.name
        image = cv2.imread(str(path))
        # Append to each list
        set_samples.append(image)
        set_labels.append(labels[class_name.lower()])
    # normalize and return
    return np.asarray(set_labels), np.asarray(set_samples)/255

train_labels, train_samples = read_dataset(Path("../Covid19-dataset/processed/train"))
train_samples, val_samples, train_labels, val_labels = train_test_split(train_samples, train_labels, test_size=0.2, random_state=42)
test_labels, test_samples = read_dataset(Path("../Covid19-dataset/processed/test"))

print(train_labels.shape, train_samples.shape)
print(val_labels.shape, val_samples.shape)
print(test_labels.shape, test_samples.shape)

(200,) (200, 340, 410, 3)
(51,) (51, 340, 410, 3)
(66,) (66, 340, 410, 3)


In [3]:
# One-hot encode labels
y_train = to_categorical(train_labels)
y_val = to_categorical(val_labels)
y_test = to_categorical(test_labels)


In [11]:
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers

# Basic model for the NN
model = models.Sequential()
# Conv net
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(340, 410, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))
# Classifier
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(3, activation='softmax'))
model.summary()
#Compile model
model.compile(loss='categorical_crossentropy',
        optimizer=optimizers.RMSprop(lr=1e-4),
        metrics=['acc'])

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 338, 408, 32)      896       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 169, 204, 32)      0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 167, 202, 32)      9248      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 83, 101, 32)       0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 83, 101, 32)       0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 268256)            0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)              

In [12]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint

callbacks_list = [
    EarlyStopping(
    monitor='acc',
    patience=5,
    ),
    ModelCheckpoint(
    filepath='models/my_CNN_model.h5',
    monitor='val_loss',
    save_best_only=True,
    )
]

# Train model
model.fit(train_samples, y_train, 
          epochs=10, 
          batch_size=64,
          callbacks=callbacks_list,
          validation_data=(val_samples, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x17b45ae89d0>

In [17]:
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report


labels = {"normal": 0, "viral pneumonia": 1, "covid": 2}

best_model = load_model('models/my_CNN_model.h5')
predictions = best_model.predict(test_samples)
y_pred = np.argmax(predictions, axis=1)
y_test_ = np.argmax(y_test, axis=1)
print(classification_report(y_test_, y_pred, target_names=list(labels.keys())))

                 precision    recall  f1-score   support

         normal       0.43      0.65      0.52        20
viral pneumonia       0.59      0.50      0.54        20
          covid       1.00      0.73      0.84        26

       accuracy                           0.64        66
      macro avg       0.67      0.63      0.63        66
   weighted avg       0.70      0.64      0.65        66

