In [1]:
import os
import pandas as pd
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import f1_score


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [14]:
data_path = "../../Data_Entry_2017_v2020.csv"

df = pd.read_csv(data_path)

df['Finding Labels'] = df['Finding Labels'].str.split('|')
df['Is_Finding'] = df['Finding Labels'].apply(lambda x: 1 if x != ['No Finding'] else 0)



is_finding = df['Is_Finding'] == 1

df1 = df[is_finding]
df1

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Is_Finding
0,00000001_000.png,[Cardiomegaly],0,1,57,M,PA,2682,2749,0.143000,0.143000,1
1,00000001_001.png,"[Cardiomegaly, Emphysema]",1,1,58,M,PA,2894,2729,0.143000,0.143000,1
2,00000001_002.png,"[Cardiomegaly, Effusion]",2,1,58,M,PA,2500,2048,0.168000,0.168000,1
4,00000003_001.png,[Hernia],0,3,74,F,PA,2500,2048,0.168000,0.168000,1
5,00000003_002.png,[Hernia],1,3,75,F,PA,2048,2500,0.168000,0.168000,1
...,...,...,...,...,...,...,...,...,...,...,...,...
112097,00030786_006.png,[Consolidation],7,30786,61,F,AP,3056,2544,0.139000,0.139000,1
112100,00030789_000.png,[Infiltration],0,30789,51,F,PA,2021,2021,0.194311,0.194311,1
112106,00030793_000.png,"[Mass, Nodule]",0,30793,57,F,PA,2021,2021,0.194311,0.194311,1
112108,00030795_000.png,[Pleural_Thickening],0,30795,52,F,PA,2021,2021,0.194311,0.194311,1


In [None]:
images_folder = "images"

In [None]:
X_train_val, X_test, y_train_val, y_test = train_test_split(df['Image Index'], df["Is_Finding"], test_size=0.2, random_state = 42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size = 0.2, random_state = 42)


In [None]:
def preprocess_image(image_path, img_size=(224, 224)):
    clean_path = ''.join(c for c in image_path if c.isprintable())

    img = load_img(clean_path, target_size=img_size)
    img_array = img_to_array(img)

    img_array = img_array / 255.0
    return img_array

def image_generator(image_indices, labels, batch_size=32, img_size=(224, 224), image_dir='images_folder'):
    num_samples = len(image_indices)
    while True:

        indices = np.random.permutation(num_samples)
        for i in range(0, num_samples, batch_size):
            batch_indices = indices[i:i+batch_size]
            batch_images = []
            batch_labels = []

            for idx in batch_indices:
                image_index = image_indices.iloc[idx]

                image_index = str(image_index).strip()

                image_path = os.path.join(image_dir, image_index)

                try:
                    img_array = preprocess_image(image_path, img_size)
                    batch_images.append(img_array)
                    #
                    batch_labels.append(labels.iloc[idx])
                except Exception as e:
                    print(f"Error processing image {image_path}: {e}")
                    continue

            if batch_images:
                yield np.array(batch_images), np.array(batch_labels)

In [None]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

def create_model(input_shape=(224, 224, 3), num_classes = 1):
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape = input_shape)

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    for layer in base_model.layers:
        layer.trainable=False

    return model

In [None]:
model = create_model()

model.compile(
    optimizer= 'adam',
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.BinaryAccuracy(name='binary_acc'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'),tf.keras.metrics.AUC(name='auc')]
)

train_generator = image_generator(X_train, y_train, batch_size=16, image_dir=images_folder)
val_generator = image_generator(X_val, y_val, batch_size=16, image_dir=images_folder)

history = model.fit(
    train_generator,
    steps_per_epoch=len(X_train)//16,
    epochs=5,
    validation_data=val_generator,
    validation_steps=len(X_val) // 16
)

for layer in model.layers[0].layers[-20:]:
    layer.trainable = True


model.compile(
    optimizer= 'adam',
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.BinaryAccuracy(name='binary_acc'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'),tf.keras.metrics.AUC(name='auc')]
)

history_fine_tune = model.fit(
    train_generator,
    steps_per_epoch=len(X_train)//16,
    epochs=5,
    validation_data=val_generator,
    validation_steps=len(X_val)//16
)

y_pred = model.predict(X_val)
y_pred_classes = (y_pred > 0.75).astype(int)
f1 = f1_score(y_val, y_pred_classes)
print("F1:", f1)

NameError: name 'create_model' is not defined