In [1]:
import os
import time
import uuid
import cv2
import json
import random
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from matplotlib import pyplot as plt
from sklearn.model_selection import *
from tensorflow.keras.applications.resnet_v2 import ResNet152V2
from tensorflow.keras.applications import *
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *

In [2]:
# Limit GPU Memmory growth:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
# # A function to annotate and extract labels
# image_size = 540

# def load_data(image_dir, annotation_dir):
# #     images = []
# #     labels = []
#     data = []

#     for image_path in tqdm(os.listdir(image_dir)):
#         if image_path.endswith('.jpg'):
#             image = cv2.imread(os.path.join(image_dir, image_path), cv2.IMREAD_GRAYSCALE)
#             # Normalize to [0, 1]
#             image = cv2.resize(image,(image_size, image_size))
#             image = image.astype('float32') / 255.0  
# #             images.append(image)

#             annotation_path = os.path.join(annotation_dir, os.path.splitext(image_path)[0] + '.json')
#             if os.path.exists(annotation_path):
#                 with open(annotation_path, 'r') as f:
#                     annotation = json.load(f)
#                 # Extract bounding box coordinates and class label from the annotation
#                 bbox = annotation['shapes'][0]['points']
#                 label = 1  # Tumor class label
#             else:
#                 bbox = []
#                 # Normal class label
#                 label = 0  
# #             labels.append((bbox, label))
#             data.append({'image':image, 'bbox':bbox, 'label':label})
            

#     return data
# #     return images, labels

In [3]:
# rewrite in order to prevent any possible errors
image_size = 540

def load_data(image_dir, annotation_dir):
    data = []

    for image_path in tqdm(os.listdir(image_dir)):
        if image_path.endswith('.jpg'):
            image = cv2.imread(os.path.join(image_dir, image_path), cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (540, 540))
            image = image.astype('float32') / 255.0

            annotation_path = os.path.join(annotation_dir, os.path.splitext(image_path)[0] + '.json')
            if os.path.exists(annotation_path):
                with open(annotation_path, 'r') as f:
                    annotation = json.load(f)
                
                bbox = annotation['shapes'][0]['points']
                label = 1  
                
            else:
                bbox = [[-1, -1], [-1, -1]]
                label = 0

            data.append({'image': image, 'bbox': bbox, 'label': label})

    return data

In [4]:
data = load_data("Data\\Train_Data\\Images", "Data\\Train_Data\\Labels")
# images, labels = load_data("Data\\Train_Data\\Images", "Data\\Train_Data\\Labels")

100%|███████████████████████████████████████████████████████████████████████████| 13832/13832 [00:44<00:00, 311.07it/s]


# Train Test Validation Split

In [5]:
train, test_data_init = train_test_split(data, test_size=0.3, stratify=[d['label'] for d in data])

In [6]:
test, val = train_test_split(test_data_init, test_size=0.5, stratify=[d['label'] for d in test_data_init])

In [7]:
len(train), len(test), len(val)

(9682, 2075, 2075)

In [None]:
#double checking the distribution of labels
# z = 0
# o = 1
# for idx in range(len(val)):
#     if val[idx]['label']==0:
#         z+=1
#     else:
#         o+=1
        
# o = o*100/len(val) 

# print(f"The percentage of ones is : {o}\n")

In [None]:
# train_labels = []
# train_images = []

# for idx in tqdm(range(len(train))):
#     temp_var_1 = train[idx]['bbox']
#     temp_var_2 = train[idx]['label']
    
#     train_labels.append((temp_var_2, temp_var_1))
#     train_images.append(train[idx]['image'])
    
#     del temp_var_1, temp_var_2

In [8]:
# Use this instead
train_images = np.array([d['image'] for d in train])
train_labels_bbox = np.array([d['bbox'] for d in train])
train_labels_class = np.array([d['label'] for d in train])

In [None]:
# test_labels = []
# test_images = []

# for idx in tqdm(range(len(test))):
#     temp_var_1 = test[idx]['bbox']
#     temp_var_2 = test[idx]['label']
    
#     test_labels.append((temp_var_2, temp_var_1))
#     test_images.append(test[idx]['image'])
    
#     del temp_var_1, temp_var_2

In [9]:
test_images = np.array([d['image'] for d in test])
test_labels_bbox = np.array([d['bbox'] for d in test])
test_labels_class = np.array([d['label'] for d in test])

In [None]:
# val_labels = []
# val_images = []

# for idx in tqdm(range(len(val))):
#     temp_var_1 = val[idx]['bbox']
#     temp_var_2 = val[idx]['label']
    
#     val_labels.append((temp_var_2, temp_var_1))
#     val_images.append(val[idx]['image'])
    
#     del temp_var_1, temp_var_2

In [10]:
val_images = np.array([d['image'] for d in val])
val_labels_bbox = np.array([d['bbox'] for d in val])
val_labels_class = np.array([d['label'] for d in val])

The data succesfully distributed into 3 sections: 1.Train, 2.Test, 3.Validation and on each one of these, the first element is image array and the label element is a tuple of bouding box and label

In [11]:
import gc

del data, test_data_init, train, test, val
gc.collect()

0

# Beware! Error Zone

In [None]:
"""Create TensorFlow datasets for images, bboxes, and labels"""
# train_images = tf.data.Dataset.from_tensor_slices(train_images)
# train_labels = tf.data.Dataset.from_tensor_slices(train_labels)
# # Zip the datasets to create the final dataset
# train_ds = tf.data.Dataset.zip((train_images, train_labels))
# train_ds = train_ds.shuffle(10000)
# train_ds = train_ds.batch(8)
# train_ds = train_ds.prefetch(4)
# # Clean your path
# del train_images, train_labels
# gc.collect()

In [None]:
"""Create TensorFlow datasets for images, bboxes, and labels"""
# test_images = tf.data.Dataset.from_tensor_slices(test_images)
# test_labels = tf.data.Dataset.from_tensor_slices(test_labels)
# # Zip the datasets to create the final dataset
# test_ds = tf.data.Dataset.zip((test_images, test_labels))
# test_ds = test_ds.shuffle(2100)
# test_ds = test_ds.batch(8)
# test_ds = test_ds.prefetch(4)
# # Clean your path
# del test_images, test_labels
# gc.collect()

In [None]:
"""Create TensorFlow datasets for images, bboxes, and labels"""
# val_images = tf.data.Dataset.from_tensor_slices(val_images)
# val_labels = tf.data.Dataset.from_tensor_slices(val_labels)
# # Zip the datasets to create the final dataset
# val_ds = tf.data.Dataset.zip((val_images, val_labels))
# val_ds = val_ds.shuffle(2100)
# val_ds = val_ds.batch(8)
# val_ds = val_ds.prefetch(4)
# # Clean your path
# del val_images, val_labels
# gc.collect()

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

# Solved !

In this modified code, we define a DataGenerator class that takes in the train_images and train_labels as the training data, and a batch_size of 8. We then instantiate the generator object train_gen and pass it to the model.fit() method.

The number of steps per epoch is calculated by dividing the length of the train_images by the batch_size. This value is then passed as the steps_per_epoch argument to model.fit().

By using the DataGenerator class instead of creating a tf.data.Dataset, we're able to load the data in batches and avoid memory issues that could occur if we tried to load the entire dataset into memory at once.

In [12]:
from tensorflow.keras.utils import Sequence

# class DataGenerator:
#     def __init__(self, images, labels, batch_size):
#         self.images, self.labels = images, labels
#         self.batch_size = batch_size
#         self.indices = np.arange(len(images))

#     def __len__(self):
#         return int(np.ceil(len(self.images) / float(self.batch_size)))

#     def __getitem__(self, idx):
#         indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
#         batch_images = [self.images[i] for i in indices]
#         batch_labels = [self.labels[i] for i in indices]
# #         batch_labels = [l[1] for l in batch_labels]
# #         batch_boxes = [b[0] for b in batch_labels]
        
        
#         return np.array(batch_images), np.array(batch_labels)


class DataGenerator(Sequence):
    def __init__(self, images, labels_bbox, labels_class, batch_size):
        self.images = images
        self.labels_bbox = labels_bbox
        self.labels_class = labels_class
        self.batch_size = batch_size
        self.indices = np.arange(len(images))

    def __len__(self):
        return int(np.ceil(len(self.images) / float(self.batch_size)))

    def __getitem__(self, idx):
        indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_images = self.images[indices]
        batch_labels_bbox = self.labels_bbox[indices]
        batch_labels_class = self.labels_class[indices]
        
        # A bit of headache
        batch_images = np.expand_dims(batch_images, axis=-1)
#         # Another headache
#         batch_labels_bbox = np.expand_dims(batch_labels_bbox, axis=-1)
#         batch_labels_class = np.expand_dims(batch_labels_class, axis=-1)
    
        return batch_images, {'bb_output': batch_labels_bbox, 'label_output': batch_labels_class}
    

train_gen = DataGenerator(train_images, train_labels_bbox, train_labels_class, batch_size=16)
test_gen = DataGenerator(test_images, test_labels_bbox, test_labels_class, batch_size=16)
val_gen = DataGenerator(val_images, val_labels_bbox, val_labels_class, batch_size=16)

In [None]:
# train_gen = DataGenerator(train_images, train_labels, batch_size=16)

In [None]:
# test_gen = DataGenerator(test_images, test_labels, batch_size=16)

In [None]:
# val_gen = DataGenerator(val_images, val_labels, batch_size=16)

In [None]:
# Now that we created our generators we dont need these items
# del train_images, train_labels, test_images, test_labels, val_images, val_labels
# gc.collect()

# Modeling

In [13]:
def create_model(): 
    input_layer = Input(shape=(540,540,3))
    res_net = ResNet152V2(weights='imagenet', include_top=False)(input_layer)

    # Bounding box model
    f2 = GlobalMaxPooling2D()(res_net)
    regress1 = Dense(128, activation='relu')(f2)
    regress2 = Dense(4, activation='sigmoid', name='bb_output')(regress1)
    
    # Classification Model  
    f1 = GlobalMaxPooling2D()(res_net)
    class1 = Dense(128, activation='relu')(f1)
    class2 = Dense(1, activation='sigmoid', name='label_output')(class1)
    
    model = Model(inputs=input_layer, outputs=[regress2, class2])
    return model

In [14]:
tumor_detector = create_model()

# Testing The Reliability of Model

In [None]:
# import keras.utils as image
# from keras.applications.efficientnet import preprocess_input

# root = "C:\\Users\\Eurus\\Desktop\\Train_Data\\Tumor\\Tumor-img-00016-00005.jpg"

# # Read it from path
# img = image.load_img(root, target_size=(540, 540))
# # Prep it 
# img_data = image.img_to_array(img)
# img_data = np.expand_dims(img_data, axis=0)
# img_data = preprocess_input(img_data)
# # Extract the features
# features = tumor_detector.predict(img_data)
# features

In [None]:
# del features, img, img_data, root

In [15]:
from sklearn.metrics import f1_score
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

# A custom F1-Score function
def f1_metric(y_true, y_pred):
    y_pred = tf.round(y_pred)
    tp = tf.reduce_sum(y_true * y_pred)
    fp = tf.reduce_sum(tf.clip_by_value(y_pred - y_true, 0, 1))
    fn = tf.reduce_sum(tf.clip_by_value(y_true - y_pred, 0, 1))
    precision = tp / (tp + fp + tf.keras.backend.epsilon())
    recall = tp / (tp + fn + tf.keras.backend.epsilon())
    f1 = 2 * precision * recall / (precision + recall + tf.keras.backend.epsilon())
    return f1

In [16]:
def localization_loss(y_true, y_pred):
    delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - y_pred[:,:2]))
    
    h_true = y_true[:,3] - y_true[:,1]
    w_true = y_true[:,2] - y_true[:,0]
    
    h_pred = y_pred[:,3] - y_pred[:,1]
    w_pred = y_pred[:,2] - y_pred[:,0]
    
    delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
    
    return delta_coord + delta_size

In [17]:
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import Accuracy, Precision, Recall


batches_per_epoches = len(train_gen)
lr_decay = (1./0.75 -1)/batches_per_epoches

opt = tf.keras.optimizers.SGD(learning_rate=0.001, decay=lr_decay)
classloss = tf.keras.losses.BinaryCrossentropy()

# losses = {
#     'bb_output': localization_loss,
#     'label_output': BinaryCrossentropy()
# }

# metrics = {
#     'bb_output': [Accuracy()],
#     'label_output': [Accuracy(), Precision(), Recall()]
# }

In [None]:
# tumor_detector.compile(optimizer=opt, loss=losses, metrics=metrics)

In [18]:
tumor_detector.compile(
    optimizer=opt,
    loss={'bb_output': localization_loss, 'label_output': 'binary_crossentropy'},
    metrics={'bb_output': Accuracy(), 'label_output': [Accuracy(), f1_metric]}
)

In [19]:
checkpoint = ModelCheckpoint('model_weights_v1.h5', save_best_only=True, save_weights_only=True)
lr_scheduler = ReduceLROnPlateau(factor=0.2, patience=3)

In [20]:
history = tumor_detector.fit(
    train_gen,
    validation_data=val_gen,
    epochs=50,
    callbacks=[checkpoint, lr_scheduler]
)

Epoch 1/50


ValueError: in user code:

    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\engine\training.py", line 998, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\engine\training.py", line 1092, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\engine\compile_utils.py", line 605, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\utils\metrics_utils.py", line 77, in decorated
        update_op = update_state_fn(*args, **kwargs)
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\metrics\base_metric.py", line 143, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\metrics\base_metric.py", line 700, in update_state  **
        matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\Eurus\Desktop\Desktop Files\IBNB-Paper\GPU-Lab-2023\lib\site-packages\keras\metrics\metrics.py", line 3571, in accuracy  **
        y_true.shape.assert_is_compatible_with(y_pred.shape)

    ValueError: Shapes (None, None, None, None) and (None, 4) are incompatible


# Evaluate

In [None]:
test_loss, test_metrics = tumor_detector.evaluate_generator(test_gen)