In [1]:
import os
import pandas as pd
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
data_path = "..\..\Data_Entry_2017_v2020.csv"

df = pd.read_csv(data_path)
df['Finding Labels'] = df['Finding Labels'].str.split('|')
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y]
0,00000001_000.png,[Cardiomegaly],0,1,57,M,PA,2682,2749,0.143,0.143
1,00000001_001.png,"[Cardiomegaly, Emphysema]",1,1,58,M,PA,2894,2729,0.143,0.143
2,00000001_002.png,"[Cardiomegaly, Effusion]",2,1,58,M,PA,2500,2048,0.168,0.168
3,00000002_000.png,[No Finding],0,2,80,M,PA,2500,2048,0.171,0.171
4,00000003_001.png,[Hernia],0,3,74,F,PA,2500,2048,0.168,0.168


In [3]:
images_folder = "../../images/final_images/images"

In [4]:
findings = []

for labels in df['Finding Labels']:
    for label in labels:
        if label not in findings:
            findings.append(label)
            
findings

['Cardiomegaly',
 'Emphysema',
 'Effusion',
 'No Finding',
 'Hernia',
 'Infiltration',
 'Mass',
 'Nodule',
 'Atelectasis',
 'Pneumothorax',
 'Pleural_Thickening',
 'Pneumonia',
 'Fibrosis',
 'Edema',
 'Consolidation']

In [5]:
for finding in findings:
    df[finding] = df['Finding Labels'].apply(lambda x: 1 if finding in x else 0)
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,...,Infiltration,Mass,Nodule,Atelectasis,Pneumothorax,Pleural_Thickening,Pneumonia,Fibrosis,Edema,Consolidation
0,00000001_000.png,[Cardiomegaly],0,1,57,M,PA,2682,2749,0.143,...,0,0,0,0,0,0,0,0,0,0
1,00000001_001.png,"[Cardiomegaly, Emphysema]",1,1,58,M,PA,2894,2729,0.143,...,0,0,0,0,0,0,0,0,0,0
2,00000001_002.png,"[Cardiomegaly, Effusion]",2,1,58,M,PA,2500,2048,0.168,...,0,0,0,0,0,0,0,0,0,0
3,00000002_000.png,[No Finding],0,2,80,M,PA,2500,2048,0.171,...,0,0,0,0,0,0,0,0,0,0
4,00000003_001.png,[Hernia],0,3,74,F,PA,2500,2048,0.168,...,0,0,0,0,0,0,0,0,0,0


In [19]:
X_train_val, X_test, y_train_val, y_test = train_test_split(df['Image Index'], df[findings], test_size=0.2, random_state = 42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size = 0.2, random_state = 42)

In [20]:
def preprocess_image(image_path, img_size=(224, 224)):
    clean_path = ''.join(c for c in image_path if c.isprintable())
    
    img = load_img(clean_path, target_size=img_size)
    img_array = img_to_array(img)

    img_array = img_array / 255.0
    return img_array

def image_generator(image_indices, labels, batch_size=32, img_size=(224, 224), image_dir='images_folder'):
    num_samples = len(image_indices)
    while True:
        
        indices = np.random.permutation(num_samples)
        for i in range(0, num_samples, batch_size):
            batch_indices = indices[i:i+batch_size]
            batch_images = []
            batch_labels = []
            
            for idx in batch_indices:
                image_index = image_indices.iloc[idx]
                
                image_index = str(image_index).strip()
                
                image_path = os.path.join(image_dir, image_index)
                
                try:
                    img_array = preprocess_image(image_path, img_size)
                    batch_images.append(img_array)
                    
                    batch_labels.append(labels.iloc[idx].values)
                except Exception as e:
                    print(f"Error processing image {image_path}: {e}")
                    continue
            
            if batch_images:
                yield np.array(batch_images), np.array(batch_labels)

In [32]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

def create_model(input_shape=(224, 224, 3), num_classes = len(findings)):
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape = input_shape)
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(num_classes, activation='sigmoid')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    
    for layer in base_model.layers:
        layer.trainable=False
    
    return model

In [33]:
model = create_model()

model.compile(
    optimizer = 'adam',
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC()]
)

train_generator = image_generator(X_train, y_train, batch_size=16, image_dir=images_folder)
val_generator = image_generator(X_val, y_val, batch_size=16, image_dir=images_folder)

history = model.fit(
    train_generator,
    steps_per_epoch=len(X_train)//16,
    epochs=10,
    validation_data=val_generator,
    validation_steps=len(X_val) // 16
)

Epoch 1/10
[1m  21/4484[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:36:30[0m 2s/step - accuracy: 0.3488 - auc_1: 0.6244 - loss: 0.4116

KeyboardInterrupt: 

In [None]:
for layer in model.layers[0].layers[-20:]:
    layer.trainable = True

model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC()]
)

history_fine_tune = model.fit(
    train_generator,
    steps_per_epoch=len(X_train)//16,
    epochs=5,
    validation_data=val_generator,
    validation_steps=len(X_val)//16
)