In [1]:
import os
import cv2
import glob
import cv2 as cv
import numpy as np
import pandas as pd
from tqdm import tqdm
import pydicom

import warnings
warnings.filterwarnings('ignore')

In [2]:
train_df = pd.read_csv("/kaggle/input/rsna-breast-cancer-detection/train.csv")
test_df = pd.read_csv("/kaggle/input/rsna-breast-cancer-detection/test.csv")

train_dir = '/kaggle/input/rnsa-train-data-with-roi-224x224/train_images_processed_cv2_dicomsdl_512/'
test_dir = '/kaggle/input/rsna-breast-cancer-detection/test_images/'

In [3]:
train_df.head()

Unnamed: 0,site_id,patient_id,image_id,laterality,view,age,cancer,biopsy,invasive,BIRADS,implant,density,machine_id,difficult_negative_case
0,2,10006,462822612,L,CC,61.0,0,0,0,,0,,29,False
1,2,10006,1459541791,L,MLO,61.0,0,0,0,,0,,29,False
2,2,10006,1864590858,R,MLO,61.0,0,0,0,,0,,29,False
3,2,10006,1874946579,R,CC,61.0,0,0,0,,0,,29,False
4,2,10011,220375232,L,CC,55.0,0,0,0,0.0,0,,21,True


In [4]:
train = train_df[['patient_id', 'image_id', 'cancer']]
train['image_path'] = train.apply(lambda x: train_dir + str(x.patient_id) + '/' + str(x.image_id) + '.png', axis=1)
train['cancer'] = train['cancer'].apply(lambda x: str(x))
train.head()

Unnamed: 0,patient_id,image_id,cancer,image_path
0,10006,462822612,0,/kaggle/input/rnsa-train-data-with-roi-224x224...
1,10006,1459541791,0,/kaggle/input/rnsa-train-data-with-roi-224x224...
2,10006,1864590858,0,/kaggle/input/rnsa-train-data-with-roi-224x224...
3,10006,1874946579,0,/kaggle/input/rnsa-train-data-with-roi-224x224...
4,10011,220375232,0,/kaggle/input/rnsa-train-data-with-roi-224x224...


In [5]:
train_pos = train[train.cancer == '1']
train_neg = train[train.cancer == '0']
train_pos.shape, train_neg.shape

((1158, 4), (53548, 4))

In [6]:
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(train, test_size = 0.2)
train_data.shape, test_data.shape

((43764, 4), (10942, 4))

In [7]:
import math
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, Callback
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Activation

In [8]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_data,
    directory=None,
    x_col='image_path',
    y_col='cancer',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='binary')

validation_generator = test_datagen.flow_from_dataframe(
    test_data,
    directory=None,
    x_col='image_path',
    y_col='cancer',
    target_size=(224, 224),
    color_mode='rgb',
    class_mode='binary')


Found 43764 validated image filenames belonging to 2 classes.
Found 10942 validated image filenames belonging to 2 classes.


In [9]:
def pF1Score(y_true, y_pred):
    true_positives = tf.keras.backend.sum(tf.keras.backend.round(tf.keras.backend.clip(y_true * y_pred, 0, 1)))
    predicted_positives = tf.keras.backend.sum(tf.keras.backend.round(tf.keras.backend.clip(y_pred, 0, 1)))
    possible_positives = tf.keras.backend.sum(tf.keras.backend.round(tf.keras.backend.clip(y_true, 0, 1)))
    precision = true_positives / (predicted_positives + tf.keras.backend.epsilon())
    recall = true_positives / (possible_positives + tf.keras.backend.epsilon())
    f1 = 2 * (precision * recall) / (precision + recall + tf.keras.backend.epsilon())
    return f1

callback = EarlyStopping(monitor='val_loss', patience=3)

In [10]:
def model_EfficientNetB0(path, lr = 0.001, dr_rate = 0.15):
    model = EfficientNetB0(include_top=False, weights=path)
    model.trainable = False

    # Rebuild top
    x = GlobalAveragePooling2D()(model.output)
    x = BatchNormalization()(x)
    x = Dropout(dr_rate)(x)
    dense_1 = Dense(64, activation="relu")(x)
    dense_2 = Dense(32, activation="relu")(dense_1)
    outputs = Dense(1, activation="sigmoid")(dense_2)

    # Compile
    model = Model(model.inputs, outputs, name="EfficientNet")
    optimizer = Adam(learning_rate=lr)
    model.compile( optimizer=optimizer, loss="binary_crossentropy", metrics=['accuracy', pF1Score])
    return model

In [11]:
efficientB0 = model_EfficientNetB0('/kaggle/input/efficentnet-b0b5-tensorflow-24-notop/efficientnet-b0_tf24_imagenet_1000_notop.h5')
# efficientB0.summary()

In [14]:
history = efficientB0.fit(
    train_generator,
    steps_per_epoch= len(train_data)//128,
    epochs = 10,
    batch_size=16,
    validation_data=validation_generator,
    validation_steps = len(test_data)//128,
    verbose = 1,
    callbacks=[callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [15]:
efficientB0.save('EfficientNet-1.h5')