
Importing DataSet from Drive


In [None]:
!unzip /content/drive/MyDrive/NEGATIVE.zip  -d /content
!unzip /content/drive/MyDrive/POSITIVE.zip  -d /content
!unzip /content/drive/MyDrive/POSITIVEGAN.zip  -d /content/content/Licenta/Processed_RSNA2/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/29087_951624387.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/28959_407797154.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/46144_112792357.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/21304_1273578482.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/2516_992751859.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/2182_2067189673.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/6685_2127534398.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/28914_1080307280.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/43539_765637762.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/44626_1988104905.png  
  inflating: /content/content/Licenta/Processed_RSNA2/NEGATIVE/11107_1843457717.png

Using ResNet18 model

In [None]:
from tensorflow.keras.optimizers.legacy import Adam, SGD
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, Model
tf.config.optimizer.set_jit(True)
def resnet_block(input_tensor, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
    x = layers.Conv2D(filters, kernel_size, strides=stride, padding="same",
                      kernel_initializer="he_normal", name=name + "_conv1")(input_tensor)
    x = layers.BatchNormalization(axis=3, name=name + "_bn1")(x)
    x = layers.Activation('relu', name=name + "_relu1")(x)

    x = layers.Conv2D(filters, kernel_size, padding="same",
                      kernel_initializer="he_normal", name=name + "_conv2")(x)
    x = layers.BatchNormalization(axis=3, name=name + "_bn2")(x)

    if conv_shortcut:
        shortcut = layers.Conv2D(filters, 1, strides=stride,
                                 kernel_initializer="he_normal", name=name + "_shortcut")(input_tensor)
        shortcut = layers.BatchNormalization(axis=3, name=name + "_shortcut_bn")(shortcut)
        x = layers.add([x, shortcut], name=name + "_add")
    else:
        x = layers.add([x, input_tensor], name=name + "_add")
    x = layers.Activation('relu', name=name + "_relu2")(x)
    return x

def resnet18(input_shape=(256, 256, 1)):
    img_input = layers.Input(shape=input_shape)

    x = layers.Conv2D(64, 7, strides=2, padding='same', kernel_initializer='he_normal', name='conv1')(img_input)
    x = layers.BatchNormalization(axis=3, name='bn_conv1')(x)
    x = layers.Activation('relu', name='relu_conv1')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding="same", name="pool1")(x)

    # Define the resnet blocks
    x = resnet_block(x, 64, stride=1, conv_shortcut=True, name="block1")
    x = resnet_block(x, 64, name="block2")
    x = resnet_block(x, 128, stride=2, conv_shortcut=True, name="block3")
    x = resnet_block(x, 128, name="block4")
    x = resnet_block(x, 256, stride=2, conv_shortcut=True, name="block5")
    x = resnet_block(x, 256, name="block6")
    x = resnet_block(x, 512, stride=2, conv_shortcut=True, name="block7")
    x = resnet_block(x, 512, name="block8")

    x = layers.GlobalAveragePooling2D(name="avg_pool")(x)

    # x = layers.Dropout(0.5)(x)

    x = layers.Dense(1, activation='sigmoid', name='fc1')(x)

    # Create model
    return Model(img_input, x, name='resnet18')

def create_model():
  model = resnet18(input_shape=(256, 256, 1))
  return model

Creating DataFrames

In [None]:
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split, KFold

data_dir = '/content/content/Licenta/Processed_RSNA2/'

# Get list of all image file paths
image_paths = []
for root, dirs, files in os.walk(data_dir):
    for file in files:
        if file.endswith('.png'):
            image_paths.append(os.path.join(root, file))

# Generate labels from file paths
labels = [os.path.basename(os.path.dirname(path)) for path in image_paths]

# Split data into train_val and test sets
train_val_paths, test_paths, train_val_labels, test_labels = train_test_split(image_paths, labels, test_size=0.1)

# Create a DataFrame to store the file paths and labels for train_val and test sets
train_val_df = pd.DataFrame({
    'path': train_val_paths,
    'label': train_val_labels
})
test_df = pd.DataFrame({
    'path': test_paths,
    'label': test_labels
})
# Print number of images in each set after augmentation
print(f'Training set: {len(train_val_df)} images')
print(f'Test set: {len(test_df)} images')

Training set: 51909 images
Test set: 5768 images


In [None]:
# Define the data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=False,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Define the batch size
batch_size = 64

# Create a test data generator (since test set does not change)
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='path',
    y_col='label',
    target_size=(256, 256),
    color_mode='grayscale',
    batch_size=batch_size,
    class_mode='binary'
)

kf = KFold(n_splits=5, shuffle=True)

# K-fold cross-validation loop
for fold, (train_idx, val_idx) in enumerate(kf.split(train_val_df)):
    print(f'Fold {fold + 1}')

    train_fold_df = train_val_df.iloc[train_idx].copy()
    val_fold_df = train_val_df.iloc[val_idx].copy()

    # Create data generators for training and validation
    train_generator = train_datagen.flow_from_dataframe(
        train_fold_df,
        x_col='path',
        y_col='label',
        target_size=(256, 256),
        color_mode='grayscale',
        batch_size=batch_size,
        class_mode='binary'
    )

    val_generator = test_datagen.flow_from_dataframe(
        val_fold_df,
        x_col='path',
        y_col='label',
        target_size=(256, 256),
        color_mode='grayscale',
        batch_size=batch_size,
        class_mode='binary'
    )

    checkpoint_path = '/content/drive/MyDrive/Checkpoint' + str(fold)

    checkpoint = ModelCheckpoint(checkpoint_path,
                             monitor='val_loss',
                             verbose=1,
                             save_best_only=True,
                             save_weights_only=False,
                             mode='auto',
                             initial_value_threshold=None)


    early_stop = EarlyStopping(monitor='val_loss',
                           patience=10,
                           mode='min',
                           verbose=1)



    optimizer = Adam(learning_rate=0.0001)
    model = create_model()
    # model.summary()
    model.compile(optimizer=optimizer, loss='binary_crossentropy',
                metrics=[Precision(name='precision'), Recall(name='recall'), AUC(name='auc')])

    # Training the model
    model.fit(train_generator, validation_data=val_generator, epochs=3, steps_per_epoch=len(train_generator),
              validation_steps=len(val_generator), callbacks=[checkpoint, early_stop])

    # Save the model for this fold
    model.save(f'model_fold_{fold + 1}.h5')




Found 5768 validated image filenames belonging to 2 classes.
Fold 1
Found 41527 validated image filenames belonging to 2 classes.
Found 10382 validated image filenames belonging to 2 classes.
Epoch 1/3
Epoch 1: val_loss improved from inf to 1.45483, saving model to /content/drive/MyDrive/Checkpoint0




Epoch 2/3
Epoch 2: val_loss improved from 1.45483 to 0.14986, saving model to /content/drive/MyDrive/Checkpoint0




Epoch 3/3
Epoch 3: val_loss improved from 0.14986 to 0.12004, saving model to /content/drive/MyDrive/Checkpoint0




Fold 2
Found 41527 validated image filenames belonging to 2 classes.
Found 10382 validated image filenames belonging to 2 classes.
Epoch 1/3
Epoch 1: val_loss improved from inf to 0.20084, saving model to /content/drive/MyDrive/Checkpoint1




Epoch 2/3
Epoch 2: val_loss improved from 0.20084 to 0.11657, saving model to /content/drive/MyDrive/Checkpoint1




Epoch 3/3
Epoch 3: val_loss did not improve from 0.11657
Fold 3
Found 41527 validated image filenames belonging to 2 classes.
Found 10382 validated image filenames belonging to 2 classes.
Epoch 1/3
Epoch 1: val_loss improved from inf to 0.13409, saving model to /content/drive/MyDrive/Checkpoint2




Epoch 2/3
Epoch 2: val_loss improved from 0.13409 to 0.11189, saving model to /content/drive/MyDrive/Checkpoint2




Epoch 3/3
Epoch 3: val_loss did not improve from 0.11189
Fold 4
Found 41527 validated image filenames belonging to 2 classes.
Found 10382 validated image filenames belonging to 2 classes.
Epoch 1/3
Epoch 1: val_loss improved from inf to 0.11517, saving model to /content/drive/MyDrive/Checkpoint3




Epoch 2/3
Epoch 2: val_loss did not improve from 0.11517
Epoch 3/3
Epoch 3: val_loss improved from 0.11517 to 0.10193, saving model to /content/drive/MyDrive/Checkpoint3




Fold 5
Found 41528 validated image filenames belonging to 2 classes.
Found 10381 validated image filenames belonging to 2 classes.
Epoch 1/3
Epoch 1: val_loss improved from inf to 0.10649, saving model to /content/drive/MyDrive/Checkpoint4




Epoch 2/3
Epoch 2: val_loss did not improve from 0.10649
Epoch 3/3
Epoch 3: val_loss did not improve from 0.10649


In [None]:
!cp /content/model_fold_2.h5 /content/drive/MyDrive/ModeleBest/
!cp /content/model_fold_3.h5 /content/drive/MyDrive/ModeleBest/
!cp /content/model_fold_4.h5 /content/drive/MyDrive/ModeleBest/
!cp /content/model_fold_5.h5 /content/drive/MyDrive/ModeleBest/

In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.metrics import Precision, Recall, BinaryAccuracy, TruePositives, TrueNegatives, FalsePositives, FalseNegatives
from tqdm import tqdm
import numpy as np

pre = Precision()
re = Recall()
acc = BinaryAccuracy()
tp = TruePositives()
tn = TrueNegatives()
fp = FalsePositives()
fn = FalseNegatives()

# Prepare your models
models = [load_model(f'model_fold_{i}.h5') for i in range(1, 6)]

for i, batch in tqdm(enumerate(test_generator), total=len(test_generator)):
    X, y = batch
    predictions = [model.predict(X) for model in models]
    # Use the np.round() function to get the binary predictions and then use np.mean() along axis=0 to compute the average
    # After that use np.round() again to get the final prediction, 0 or 1
    yhat = np.round(np.mean([np.round(pred) for pred in predictions], axis=0))

    pre.update_state(y, yhat)
    re.update_state(y, yhat)
    acc.update_state(y, yhat)
    tp.update_state(y, yhat)
    tn.update_state(y, yhat)
    fp.update_state(y, yhat)
    fn.update_state(y, yhat)
    if(i>len(test_generator)):
        break

print("Precision:", pre.result().numpy())
print("Recall:", re.result().numpy())
print("Binary Accuracy:", acc.result().numpy())
print("True Positives:", tp.result().numpy())
print("True Negatives:", tn.result().numpy())
print("False Positives:", fp.result().numpy())
print("False Negatives:", fn.result().numpy())


  0%|          | 0/91 [00:00<?, ?it/s]



  1%|          | 1/91 [00:10<15:45, 10.50s/it]



  2%|▏         | 2/91 [00:11<07:23,  4.98s/it]



  3%|▎         | 3/91 [00:12<04:49,  3.29s/it]



  4%|▍         | 4/91 [00:14<03:31,  2.44s/it]



  5%|▌         | 5/91 [00:15<02:52,  2.00s/it]



  7%|▋         | 6/91 [00:16<02:32,  1.79s/it]



  8%|▊         | 7/91 [00:18<02:19,  1.66s/it]



  9%|▉         | 8/91 [00:19<02:03,  1.49s/it]



 10%|▉         | 9/91 [00:20<01:53,  1.38s/it]



 11%|█         | 10/91 [00:21<01:45,  1.30s/it]



 12%|█▏        | 11/91 [00:22<01:35,  1.19s/it]



 13%|█▎        | 12/91 [00:23<01:34,  1.19s/it]



 14%|█▍        | 13/91 [00:24<01:33,  1.20s/it]



 15%|█▌        | 14/91 [00:26<01:32,  1.21s/it]



 16%|█▋        | 15/91 [00:27<01:27,  1.15s/it]



 18%|█▊        | 16/91 [00:28<01:24,  1.12s/it]



 19%|█▊        | 17/91 [00:29<01:27,  1.18s/it]



 20%|█▉        | 18/91 [00:30<01:28,  1.22s/it]



 21%|██        | 19/91 [00:31<01:26,  1.21s/it]



 22%|██▏       | 20/91 [00:32<01:22,  1.16s/it]



 23%|██▎       | 21/91 [00:33<01:18,  1.12s/it]



 24%|██▍       | 22/91 [00:34<01:14,  1.07s/it]



 25%|██▌       | 23/91 [00:36<01:15,  1.12s/it]



 26%|██▋       | 24/91 [00:37<01:12,  1.08s/it]



 27%|██▋       | 25/91 [00:38<01:11,  1.09s/it]



 29%|██▊       | 26/91 [00:39<01:11,  1.10s/it]



 30%|██▉       | 27/91 [00:40<01:10,  1.11s/it]



 31%|███       | 28/91 [00:41<01:10,  1.13s/it]



 32%|███▏      | 29/91 [00:43<01:13,  1.19s/it]



 33%|███▎      | 30/91 [00:44<01:17,  1.27s/it]



 34%|███▍      | 31/91 [00:45<01:14,  1.25s/it]



 35%|███▌      | 32/91 [00:46<01:11,  1.22s/it]



 36%|███▋      | 33/91 [00:48<01:10,  1.22s/it]



 37%|███▋      | 34/91 [00:49<01:06,  1.18s/it]



 38%|███▊      | 35/91 [00:50<01:05,  1.16s/it]



 40%|███▉      | 36/91 [00:51<01:00,  1.10s/it]



 41%|████      | 37/91 [00:52<01:00,  1.11s/it]



 42%|████▏     | 38/91 [00:53<01:00,  1.14s/it]



 43%|████▎     | 39/91 [00:54<00:57,  1.10s/it]



 44%|████▍     | 40/91 [00:55<00:58,  1.14s/it]



 45%|████▌     | 41/91 [00:56<00:58,  1.16s/it]



 46%|████▌     | 42/91 [00:58<00:59,  1.22s/it]



 47%|████▋     | 43/91 [00:59<00:56,  1.19s/it]



 48%|████▊     | 44/91 [01:00<00:52,  1.11s/it]



 49%|████▉     | 45/91 [01:01<00:53,  1.15s/it]



 51%|█████     | 46/91 [01:02<00:53,  1.19s/it]



 52%|█████▏    | 47/91 [01:04<00:51,  1.17s/it]



 53%|█████▎    | 48/91 [01:05<00:49,  1.16s/it]



 54%|█████▍    | 49/91 [01:06<00:47,  1.14s/it]



 55%|█████▍    | 50/91 [01:07<00:44,  1.08s/it]



 56%|█████▌    | 51/91 [01:08<00:44,  1.11s/it]



 57%|█████▋    | 52/91 [01:09<00:45,  1.16s/it]



 58%|█████▊    | 53/91 [01:10<00:45,  1.21s/it]



 59%|█████▉    | 54/91 [01:12<00:44,  1.21s/it]



 60%|██████    | 55/91 [01:13<00:42,  1.19s/it]



 62%|██████▏   | 56/91 [01:14<00:39,  1.14s/it]



 63%|██████▎   | 57/91 [01:15<00:37,  1.12s/it]



 64%|██████▎   | 58/91 [01:16<00:35,  1.08s/it]



 65%|██████▍   | 59/91 [01:17<00:33,  1.06s/it]



 66%|██████▌   | 60/91 [01:18<00:33,  1.08s/it]



 67%|██████▋   | 61/91 [01:19<00:32,  1.09s/it]



 68%|██████▊   | 62/91 [01:20<00:33,  1.15s/it]



 69%|██████▉   | 63/91 [01:22<00:33,  1.19s/it]



 70%|███████   | 64/91 [01:23<00:32,  1.21s/it]



 71%|███████▏  | 65/91 [01:24<00:32,  1.25s/it]



 73%|███████▎  | 66/91 [01:25<00:30,  1.21s/it]



 74%|███████▎  | 67/91 [01:27<00:28,  1.19s/it]



 75%|███████▍  | 68/91 [01:28<00:26,  1.17s/it]



 76%|███████▌  | 69/91 [01:29<00:25,  1.16s/it]



 77%|███████▋  | 70/91 [01:30<00:23,  1.11s/it]



 78%|███████▊  | 71/91 [01:31<00:21,  1.06s/it]



 79%|███████▉  | 72/91 [01:32<00:20,  1.08s/it]



 80%|████████  | 73/91 [01:33<00:18,  1.05s/it]



 81%|████████▏ | 74/91 [01:34<00:18,  1.08s/it]



 82%|████████▏ | 75/91 [01:35<00:17,  1.12s/it]



 84%|████████▎ | 76/91 [01:37<00:18,  1.22s/it]



 85%|████████▍ | 77/91 [01:38<00:17,  1.23s/it]



 86%|████████▌ | 78/91 [01:39<00:15,  1.18s/it]



 87%|████████▋ | 79/91 [01:40<00:13,  1.11s/it]



 88%|████████▊ | 80/91 [01:41<00:11,  1.05s/it]



 89%|████████▉ | 81/91 [01:42<00:11,  1.11s/it]



 90%|█████████ | 82/91 [01:43<00:09,  1.09s/it]



 91%|█████████ | 83/91 [01:44<00:08,  1.11s/it]



 92%|█████████▏| 84/91 [01:46<00:08,  1.15s/it]



 93%|█████████▎| 85/91 [01:47<00:06,  1.11s/it]



 95%|█████████▍| 86/91 [01:48<00:05,  1.09s/it]



 96%|█████████▌| 87/91 [01:49<00:04,  1.20s/it]



 97%|█████████▋| 88/91 [01:50<00:03,  1.24s/it]



 98%|█████████▊| 89/91 [01:59<00:06,  3.41s/it]



 99%|█████████▉| 90/91 [02:00<00:02,  2.69s/it]



100%|██████████| 91/91 [02:01<00:00,  2.14s/it]



92it [02:02,  1.76s/it]                        



92it [02:03,  1.34s/it]

Precision: 0.995283
Recall: 0.55526316
Binary Accuracy: 0.9711669
True Positives: 211.0
True Negatives: 5515.0
False Positives: 1.0
False Negatives: 169.0





In [None]:
!cp /content/model_fold_1.h5 /content/drive/MyDrive/ModeleBest/
