In [3]:
# Chest X-Ray Diagnosis with Deep Learning - Starter Notebook (NIH Chest X-ray14)

import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Paths
base_dir = './archive'
image_dirs = [os.path.join(base_dir, f'images_{i:03}') for i in range(1, 13)]
data_csv_path = os.path.join(base_dir, 'Data_Entry_2017.csv')

# Load metadata
metadata = pd.read_csv(data_csv_path)
print("Total entries:", len(metadata))
print(metadata.head())

# Build full image path
image_path_map = {}
for img_dir in image_dirs:
    for fname in os.listdir(img_dir):
        image_path_map[fname] = os.path.join(img_dir, fname)

metadata['full_path'] = metadata['Image Index'].map(image_path_map.get)

# Filter missing paths
metadata = metadata[metadata['full_path'].notnull()]
print("Valid image paths:", len(metadata))

# Preview label distribution
plt.figure(figsize=(12, 6))
label_counts = metadata['Finding Labels'].value_counts().sort_values(ascending=False)
sns.barplot(x=label_counts.values[:15], y=label_counts.index[:15])
plt.title('Top 15 Diagnoses in Dataset')
plt.xlabel('Count')
plt.tight_layout()
plt.show()

# ---------------------------
# ImageDataGenerator Setup
# ---------------------------

# Convert multilabel strings to single label by choosing the first label (for now)
metadata['Finding Labels'] = metadata['Finding Labels'].apply(lambda x: x.split('|')[0])

# Initialize generator
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# Training generator
train_gen = datagen.flow_from_dataframe(
    dataframe=metadata,
    x_col='full_path',
    y_col='Finding Labels',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

# Validation generator
val_gen = datagen.flow_from_dataframe(
    dataframe=metadata,
    x_col='full_path',
    y_col='Finding Labels',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)


ModuleNotFoundError: No module named 'tensorflow.keras'