In [18]:
import os
import pandas as pd
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score
#

In [19]:
data_path = "../Downloads/CXR8/Data_Entry_2017_v2020.csv"

df = pd.read_csv(data_path)

df['Finding Labels'] = df['Finding Labels'].str.split('|')
df['Is_Finding'] = df['Finding Labels'].apply(lambda x: 1 if x != ['No Finding'] else 0)

df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Is_Finding
0,00000001_000.png,[Cardiomegaly],0,1,57,M,PA,2682,2749,0.143,0.143,1
1,00000001_001.png,"[Cardiomegaly, Emphysema]",1,1,58,M,PA,2894,2729,0.143,0.143,1
2,00000001_002.png,"[Cardiomegaly, Effusion]",2,1,58,M,PA,2500,2048,0.168,0.168,1
3,00000002_000.png,[No Finding],0,2,80,M,PA,2500,2048,0.171,0.171,0
4,00000003_001.png,[Hernia],0,3,74,F,PA,2500,2048,0.168,0.168,1


In [20]:
images_folder = "../Downloads/CXR8/images"

In [21]:
X_train_val, X_test, y_train_val, y_test = train_test_split(df['Image Index'], df["Is_Finding"], test_size=0.2, random_state = 42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size = 0.2, random_state = 42)


In [22]:
def preprocess_image(image_path, img_size=(224, 224)):
    clean_path = ''.join(c for c in image_path if c.isprintable())

    img = load_img(clean_path, target_size=img_size)
    img_array = img_to_array(img)

    img_array = img_array / 255.0
    return img_array

def image_generator(image_indices, labels, batch_size=32, img_size=(224, 224), image_dir='images_folder'):
    num_samples = len(image_indices)
    while True:

        indices = np.random.permutation(num_samples)
        for i in range(0, num_samples, batch_size):
            batch_indices = indices[i:i+batch_size]
            batch_images = []
            batch_labels = []

            for idx in batch_indices:
                image_index = image_indices.iloc[idx]

                image_index = str(image_index).strip()

                image_path = os.path.join(image_dir, image_index)

                try:
                    img_array = preprocess_image(image_path, img_size)
                    batch_images.append(img_array)
                    #
                    batch_labels.append(labels.iloc[idx])
                except Exception as e:
                    #print(f"Error processing image {image_path}: {e}")
                    continue

            if batch_images:
                yield np.array(batch_images), np.array(batch_labels)

In [23]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

def create_model(input_shape=(224, 224, 3), num_classes = 1):
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape = input_shape)

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable=False

    return model

In [25]:
model = create_model()

model.compile(
    optimizer= 'adam',
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.BinaryAccuracy(name='binary_acc'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'),tf.keras.metrics.AUC(name='auc')]
)

train_generator = image_generator(X_train, y_train, batch_size=16, image_dir=images_folder)
val_generator = image_generator(X_val, y_val, batch_size=16, image_dir=images_folder)

history = model.fit(
    train_generator,
    steps_per_epoch=len(X_train)//16,
    epochs=5,
    validation_data=val_generator,
    validation_steps=len(X_val) // 16
)


Epoch 1/5
[1m4484/4484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3597s[0m 801ms/step - auc: 0.6819 - binary_acc: 0.6405 - loss: 0.6463 - precision: 0.6167 - recall: 0.5776 - val_auc: 0.7287 - val_binary_acc: 0.6657 - val_loss: 0.6211 - val_precision: 0.6090 - val_recall: 0.7436
Epoch 2/5
[1m4484/4484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2272s[0m 507ms/step - auc: 0.7201 - binary_acc: 0.6727 - loss: 0.6144 - precision: 0.6506 - recall: 0.6200 - val_auc: 0.7323 - val_binary_acc: 0.6770 - val_loss: 0.6115 - val_precision: 0.6407 - val_recall: 0.6620
Epoch 3/5
[1m4484/4484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2342s[0m 522ms/step - auc: 0.7230 - binary_acc: 0.6737 - loss: 0.6127 - precision: 0.6542 - recall: 0.6243 - val_auc: 0.7329 - val_binary_acc: 0.6668 - val_loss: 0.6189 - val_precision: 0.6091 - val_recall: 0.7498
Epoch 4/5
[1m4484/4484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2289s[0m 511ms/step - auc: 0.7284 - binary_acc: 0.6780 - loss: 0.6085 

In [29]:

# for layer in model.layers[0].layers[-20:]:
#     layer.trainable = True

for layer in model.layers[-20:]:
    layer.trainable = True


model.compile(
    optimizer= 'adam',
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.BinaryAccuracy(name='binary_acc'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'),tf.keras.metrics.AUC(name='auc')]
)

history_fine_tune = model.fit(
    train_generator,
    steps_per_epoch=len(X_train)//16,
    epochs=5,
    validation_data=val_generator,
    validation_steps=len(X_val)//16
)


Epoch 1/5
[1m4484/4484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2278s[0m 507ms/step - auc: 0.7246 - binary_acc: 0.6765 - loss: 0.6110 - precision: 0.6579 - recall: 0.6138 - val_auc: 0.7499 - val_binary_acc: 0.6935 - val_loss: 0.5907 - val_precision: 0.6605 - val_recall: 0.6736
Epoch 2/5
[1m4484/4484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2295s[0m 512ms/step - auc: 0.7459 - binary_acc: 0.6923 - loss: 0.5937 - precision: 0.6715 - recall: 0.6451 - val_auc: 0.7454 - val_binary_acc: 0.6709 - val_loss: 0.6172 - val_precision: 0.6090 - val_recall: 0.7759
Epoch 3/5
[1m4484/4484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2307s[0m 514ms/step - auc: 0.7494 - binary_acc: 0.6942 - loss: 0.5910 - precision: 0.6733 - recall: 0.6504 - val_auc: 0.7539 - val_binary_acc: 0.6908 - val_loss: 0.5959 - val_precision: 0.6992 - val_recall: 0.5670
Epoch 4/5
[1m4484/4484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2295s[0m 512ms/step - auc: 0.7589 - binary_acc: 0.7026 - loss: 0.5830 

In [62]:
print(X_val)

21625     00005750_025.png
29832     00007757_001.png
109210    00029676_005.png
67081     00016577_012.png
51533     00013012_000.png
                ...       
58199     00014397_003.png
51489     00013003_017.png
49687     00012616_003.png
109405    00029763_004.png
66668     00016486_005.png
Name: Image Index, Length: 17940, dtype: object


In [None]:

test_images = []
test_labels = []


test_generator = image_generator(X_val, y_val, batch_size=1, image_dir=images_folder)

for _ in range(100):
    img_batch, label_batch = next(test_generator)
    test_images.append(img_batch[0])
    test_labels.append(label_batch[0]) 

test_images = np.array(test_images)
test_labels = np.array(test_labels)


y_pred = model.predict(test_images)
y_pred_classes = (y_pred > 0.75).astype(int)
f1 = f1_score(y_test, y_pred_classes)
print("F1:", f1)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 601ms/step
F1: 0.28125
