In [1]:
import ast
import os
import random

import numpy as np
from tqdm import tqdm
import pandas as pd
import tensorflow as tf
import cv2
import pydicom
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import ast

In [2]:
import warnings
warnings.simplefilter("ignore")
pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)

In [3]:
raw_df = pd.read_csv(os.path.join("..","processed_label_data.csv"))

In [4]:
raw_df["class_ids"] = raw_df["class_ids"].apply(ast.literal_eval)

In [5]:
ae_filtered_df = raw_df[raw_df["class_ids"].apply(lambda x: 0 in x)].sample(500)
nf_filtered_df = raw_df[raw_df["class_ids"].apply(lambda x: 14 in x)].sample(500)

In [6]:
from keras.layers import Conv2D, Dropout, MaxPool2D, Dense, Input, GlobalAveragePooling2D, BatchNormalization, Add, ReLU
from keras.models import Model
from keras.optimizers import RMSprop
from keras.utils import plot_model
def build_model(inputs, kernel_size, n_classes):
    x = Conv2D(filters=16, kernel_size=kernel_size, padding="same")(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPool2D(pool_size=(2,2))(x)

    x = residual_block(x, 16, kernel_size)

    x = Conv2D(filters=32, kernel_size=kernel_size, padding="same")(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPool2D(pool_size=(2,2))(x)

    x = residual_block(x, 32, kernel_size)

    x = Conv2D(filters=64, kernel_size=kernel_size, padding="same")(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = MaxPool2D(pool_size=(2,2))(x)

    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation="relu")(x)
    x = Dropout(0.25)(x)
    x = Dense(256, activation="relu")(x)
    x = Dropout(0.25)(x)
    x = Dense(128, activation="relu")(x)
    x = Dense(n_classes, activation="sigmoid")(x)

    return Model(inputs=inputs, outputs=x)


def residual_block(x, filters, kernel_size, strides = 1):
    shortcut = x

    shortcut = Conv2D(filters=2*filters, kernel_size=kernel_size, strides=strides, padding="same")(shortcut)
    shortcut = BatchNormalization()(shortcut)

    x = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding="same")(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(filters=2*filters, kernel_size=kernel_size, strides=strides, padding="same")(x)
    x = BatchNormalization()(x)

    x = Add()([shortcut, x])
    x = ReLU()(x)

    return x

model = build_model(Input(shape=(600, 600, 3)), kernel_size=8, n_classes=1)
model.summary()
optimizer = RMSprop(learning_rate = 0.001)
plot_model(to_file="model.png", model=model)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 600, 600, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 600, 600, 16)         3088      ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 600, 600, 16)         64        ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 re_lu (ReLU)                (None, 600, 600, 16)         0         ['batch_normalization[0][0

In [7]:
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import StratifiedShuffleSplit


class MaxAccuracy(Callback):
    def __init__(self):
        super(MaxAccuracy, self).__init__()
        self.max_train_accuracy = 0.0
        self.max_val_accuracy = 0.0

    def on_epoch_end(self, epoch, logs=None):
        train_accuracy = logs['accuracy']
        val_accuracy = logs['val_accuracy']
        if train_accuracy > self.max_train_accuracy:
            self.max_train_accuracy = train_accuracy
        if val_accuracy > self.max_val_accuracy:
            self.max_val_accuracy = val_accuracy
        print(
            f" Max Train Accuracy: {self.max_train_accuracy:.4f}, Max Validation Accuracy: {self.max_val_accuracy:.4f}")

# def custom_data_generator(x, y, batch_size):
#     num_samples = len(x)
#     steps_per_epoch = num_samples // batch_size
#
#     for i in range(steps_per_epoch):
#         start_idx = i * batch_size
#         end_idx = (i + 1) * batch_size
#
#         batch_filenames = x[start_idx:end_idx]
#         batch_labels = y[start_idx:end_idx]
#
#         batch_images = []
#         for filename in batch_filenames:
#             img = cv2.imread(os.path.join("..", "dataLake", filename+".png"))
#             img = img/255
#             batch_images.append(img)
#
#         yield np.array(batch_images), np.array(batch_labels)


def train_model(model, x, y, n_splits=2, batch_size=64, test_size=0.2, random_state=47, n_epochs=20,
                min_learning_rate=0.0000001, lr_decay_factor=0.8):
    splitter = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=random_state)

    aug_set_id = 0
    histories = []
    max_accuracy = MaxAccuracy()
    for train_ids, test_ids in splitter.split(x, y):
        checkpoint = ModelCheckpoint("model_" + str(aug_set_id) + ".h5", monitor="val_accuracy",
                                     save_best_only=True, model="max")
        aug_set_id = aug_set_id + 1

        x_train, x_test = np.asarray(x[train_ids]), np.asarray(x[test_ids])


        base_path = os.path.join("..", "dataLake")
        img_train = np.asarray([cv2.imread(os.path.join(base_path, x+".png")) for x in x_train]) / 255
        img_test = np.asarray([cv2.imread(os.path.join(base_path, x+".png")) for x in x_test]) / 255
        y_train, y_test = np.asarray(y[train_ids]), np.asarray(y[test_ids])

        histories.append(model.fit(img_train, y_train, batch_size = 32,
                                   epochs=n_epochs, validation_data=(img_test, y_test),
                                   callbacks=[ReduceLROnPlateau(monitor="val_loss", factor=lr_decay_factor,
                                                                patience=2, min_lr=min_learning_rate), checkpoint, max_accuracy]))

    return histories

x1 = ae_filtered_df["image_id"].to_list()
x2 = nf_filtered_df["image_id"].to_list()
x1.extend(x2)
x = np.asarray(x1)

y = [1]*500
y.extend([0]*500)
y = np.asarray(y)

train_model(model, x, y)

Epoch 1/20

KeyboardInterrupt: 

In [None]:
raw_df[raw_df["image_id"] == "469bd3f41e12a8f03a8ef0ce191743ce"]