In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing
import os #local files
import zipfile #unzip files
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
#for dirname, _, filenames in os.walk('/kaggle/input'):
#for filename in filenames:
#print(os.path.join(dirname, filename))

In [2]:
seed_value= 12321 # 1. Set `PYTHONHASHSEED` environment variable at a fixed value
os.environ['PYTHONHASHSEED']=str(seed_value)# 2. Set `python` built-in pseudo-ran

In [3]:
#set a seed value
from numpy.random import seed
seed(1)
tf.random.set_seed(2)

In [4]:
import nibabel as nib
from scipy import ndimage
#function to read the nii file
def read_nifti_file(filepath):
    # Read file
    scan = nib.load(filepath)
    # Get raw data
    scan = scan.get_fdata()
    return scan

In [5]:
#function to normalize the image
def normalize(volume):
    min = -1000 #make the minimum of a volume to -1000
    max = 400 #make the maximum of a volume to -1000
    volume[volume < min] = min
    volume[volume > max] = max
    volume = (volume - min) / (max - min)
    volume = volume.astype("float32")#make raw voxel float values
    return volume

In [6]:
def resize_volume(img):
    """Resize across z-axis"""
    # Set the desired depth
    desired_depth = 64
    desired_width = 128
    desired_height = 128
    # Get current depth
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    # Compute depth factor
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor =1 / height
    # Rotate
    img = ndimage.rotate(img, 90, reshape=False)
    # spline zoom interpolated
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    return img

In [7]:
def process_scan(path):
    # Read the volume
    volume = read_nifti_file(path)
    # Normalize the volume
    volume = normalize(volume)
    # Resize width, height and depth of the volume
    volume = resize_volume(volume)
    return volume

In [8]:
#import datasets
! pip install -q kaggle

In [9]:
from google.colab import files

files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"rymkhettab","key":"f9752b9341ab25bd51c202ae7513518f"}'}

In [10]:
! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

In [11]:
 ! chmod 600 ~/.kaggle/kaggle.json

In [12]:
! kaggle datasets download -d andrewmvd/covid19-ct-scans

Downloading covid19-ct-scans.zip to /content
100% 1.03G/1.03G [00:08<00:00, 136MB/s]
100% 1.03G/1.03G [00:08<00:00, 131MB/s]


In [13]:
! kaggle datasets download -d mathurinache/mosmeddata-chest-ct-scans-with-covid19

Downloading mosmeddata-chest-ct-scans-with-covid19.zip to /content
100% 11.1G/11.1G [04:35<00:00, 35.1MB/s]
100% 11.1G/11.1G [04:35<00:00, 43.3MB/s]


In [14]:
import zipfile
with zipfile.ZipFile("/content/covid19-ct-scans.zip","r") as z:
  z.extractall(".")

In [15]:
import zipfile
with zipfile.ZipFile("/content/mosmeddata-chest-ct-scans-with-covid19.zip","r") as z:
  z.extractall(".")

In [16]:
import glob
abnormal_ct_scan_paths=[
    os.path.join("/content/ct_scans", x)
    for x in os.listdir("/content/ct_scans")
]
#/content/covid19-ct-scans.zip
#/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies
normal_ct_scan_paths1= [
    os.path.join("/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-0", x)
    for x in os.listdir("/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-0")
]
normal_ct_scan_paths2=sorted(normal_ct_scan_paths1)

normal_ct_scan_paths=normal_ct_scan_paths2[0:102]
abnormal_ct_scan_paths1= [
    os.path.join("/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-1", x)
    for x in os.listdir("/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-1")
]
abnormal_ct_scan_paths2=sorted(abnormal_ct_scan_paths1)
abnormal_ct_scan_paths3=abnormal_ct_scan_paths2[:20]
for i in abnormal_ct_scan_paths3:
    abnormal_ct_scan_paths.append(i)
ct_2_a_ajouter=[
    os.path.join("/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-2",x)
    for x in os.listdir("/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-2")
]
ct_2_sorted=sorted(ct_2_a_ajouter)
ct_2=ct_2_sorted[:30]#etait a 30
for i in ct_2:
    abnormal_ct_scan_paths.append(i)
ct_3_a_ajouter=[
    os.path.join("/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-3",x)
    for x in os.listdir("/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-3")
]
ct_3_sorted=sorted(ct_3_a_ajouter)
ct_3=ct_3_sorted[:30]#elle etait 28 
for i in ct_3:
    abnormal_ct_scan_paths.append(i)
abnormal_ct_scan_paths.append('/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-4/study_1109.nii')
abnormal_ct_scan_paths.append('/content/MosMedData Chest CT Scans with COVID-19 Related Findings COVID19_1110 1.0/studies/CT-4/study_1110.nii')

In [17]:
abnormal_scans = np.array([process_scan(path) for path in abnormal_ct_scan_paths])
normal_scans = np.array([process_scan(path) for path in normal_ct_scan_paths])

In [18]:
abnormal_labels = np.array([1 for i in range(len(abnormal_scans))])
normal_labels = np.array([0 for i in range(len(normal_scans))])

In [19]:
# Split data in the ratio 70-30 for training and validation.
x_train = np.concatenate((abnormal_scans[:71], normal_scans[:71]), axis=0)
y_train = np.concatenate((abnormal_labels[:71], normal_labels[:71]), axis=0)
x_val = np.concatenate((abnormal_scans[71:], normal_scans[71:]), axis=0)
y_val = np.concatenate((abnormal_labels[71:], normal_labels[71:]), axis=0)

In [20]:
#data augmentation
def rotate(volume):

    def scipy_rotate(volume):
        # define rotation angles
        angles = [-20, -10, -5, 5, 10, 20]
        # pick angles at random
        angle = random.choice(angles)
        # rotate volume
        volume = ndimage.rotate(volume, angle, reshape=False)
        volume[volume < 0] = 0
        volume[volume > 1] = 1
        return volume

    augmented_volume = tf.numpy_function(scipy_rotate, [volume], tf.float32)#augment the volume
    return augmented_volume


def train_preprocessing(volume, label):
    #rotating and adding a channel to the training volume
    volume = rotate(volume)
    volume = tf.expand_dims(volume, axis=3)
    return volume, label

def validation_preprocessing(volume, label):
    #adding a channel to the validation volume """
    volume = tf.expand_dims(volume, axis=3)
    return volume, label

In [21]:
import tensorflow as tf
# Define data loaders.
train_loader = tf.data.Dataset.from_tensor_slices((x_train, y_train))
validation_loader = tf.data.Dataset.from_tensor_slices((x_val, y_val))
batch_size = 2
# Augment the on the fly during training.
train_dataset = (
    train_loader.shuffle(len(x_train))
    .map(train_preprocessing)
    .batch(batch_size)
    .prefetch(2)
)
# rescale data
validation_dataset = (
    validation_loader.shuffle(len(x_val))
    .map(validation_preprocessing)
    .batch(batch_size)
    .prefetch(2)
)


In [22]:
import matplotlib.pyplot as plt
import random

In [23]:
import tensorflow as tf
from tensorflow import keras
def get_model(width=128, height=128, depth=64):
    inputs = keras.Input((width, height, depth, 1))
    x = keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(inputs)
    x = keras.layers.MaxPool3D(pool_size=2)(x)
    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.Conv3D(filters=64, kernel_size=3, activation="relu")(x)
    x = keras.layers.MaxPool3D(pool_size=2)(x)
    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.Conv3D(filters=128, kernel_size=3, activation="relu")(x)
    x = keras.layers.MaxPool3D(pool_size=2)(x)
    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.Conv3D(filters=256, kernel_size=3, activation="relu")(x)
    x = keras.layers.MaxPool3D(pool_size=2)(x)
    x = keras.layers.BatchNormalization()(x)

    x = keras.layers.GlobalAveragePooling3D()(x)
    x = keras.layers.Dense(units=512, activation="relu")(x)
    x = keras.layers.Dropout(0.3)(x)
    
    outputs = keras.layers.Dense(units=1, activation="sigmoid")(x)
    # Define the model
    model = keras.Model(inputs, outputs, name="3dcnn")
    return model

# Build model.
model = get_model(width=128, height=128, depth=64)
model.summary()

Model: "3dcnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 64, 1)] 0         
_________________________________________________________________
conv3d (Conv3D)              (None, 126, 126, 62, 64)  1792      
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 63, 63, 31, 64)    0         
_________________________________________________________________
batch_normalization (BatchNo (None, 63, 63, 31, 64)    256       
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 61, 61, 29, 64)    110656    
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 30, 30, 14, 64)    0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 30, 30, 14, 64)    256   

In [None]:
# Compile model
initial_learning_rate = 0.0001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
)
model.compile(
    loss="binary_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
    metrics=["acc"],
)

# Define callbacks
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    "3d_image_classification.h5", save_best_only=True
)
early_stopping_cb = keras.callbacks.EarlyStopping(monitor="val_acc", patience=15)

# Training the model
epochs = 100
model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=epochs,
    shuffle=True,
    verbose=2,
    callbacks=[checkpoint_cb, early_stopping_cb],
)

Epoch 1/100
71/71 - 1759s - loss: 0.6729 - acc: 0.6268 - val_loss: 1.0098 - val_acc: 0.5000
Epoch 2/100
71/71 - 1746s - loss: 0.6652 - acc: 0.5775 - val_loss: 0.6810 - val_acc: 0.5968
Epoch 3/100
71/71 - 1728s - loss: 0.6262 - acc: 0.6268 - val_loss: 1.7293 - val_acc: 0.5000
Epoch 4/100
71/71 - 1727s - loss: 0.6031 - acc: 0.6127 - val_loss: 0.7492 - val_acc: 0.5000
Epoch 5/100
71/71 - 1722s - loss: 0.5561 - acc: 0.6761 - val_loss: 0.6732 - val_acc: 0.5968
Epoch 6/100
71/71 - 1718s - loss: 0.5821 - acc: 0.6690 - val_loss: 1.1541 - val_acc: 0.5000
Epoch 7/100
71/71 - 1718s - loss: 0.5730 - acc: 0.6690 - val_loss: 0.6233 - val_acc: 0.6290
Epoch 8/100
71/71 - 1717s - loss: 0.5769 - acc: 0.6479 - val_loss: 0.6393 - val_acc: 0.5806
Epoch 9/100
71/71 - 1719s - loss: 0.5689 - acc: 0.6901 - val_loss: 0.5826 - val_acc: 0.7419
Epoch 10/100
71/71 - 1719s - loss: 0.6110 - acc: 0.6620 - val_loss: 0.5588 - val_acc: 0.6613
Epoch 11/100
71/71 - 1719s - loss: 0.5865 - acc: 0.6901 - val_loss: 1.1568 - va

In [None]:
model.save('./mymodel.h5')

NameError: ignored

In [None]:
model.save_weights('poids-v4.h5')

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 2, figsize=(20, 3))
ax = ax.ravel()

for i, metric in enumerate(["acc", "loss"]):
    ax[i].plot(model.history.history[metric])
    ax[i].plot(model.history.history["val_" + metric])
    ax[i].set_title("Model {}".format(metric))
    ax[i].set_xlabel("epochs")
    ax[i].set_ylabel(metric)
    ax[i].legend(["train", "val"])


In [None]:
# Load best weights.
model.load_weights("3dimageclassification.h5")
prediction = model.predict(np.expand_dims(x_val[0], axis=0))[0]
scores = [1 - prediction[0], prediction[0]]

class_names = ["normal", "abnormal"]
for score, name in zip(scores, class_names):
    print(
        "This model is %.2f percent confident that CT scan is %s"
        % ((100 * score), name)
    )

In [None]:
# testing data
model.load_weights("./poids-v4.h5")
xnew=np.array(process_scan('../input/testingset/o19890323_233654IM051RoutineTHORAXFANONYMIZEDs002a001.nii'))
print(xnew.shape)
XnewX=np.expand_dims(xnew, axis=0)
print(XnewX.shape)
prediction = model.predict(XnewX)[0]
scores = [1 - prediction[0], prediction[0]]
class_names = ["normal", "abnormal"]
for score, name in zip(scores, class_names):
    
    print(
        "This model is %.2f percent confident that CT scan is %s"
        % ((100 * score), name)
    )