In [1]:
import torch
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

1
NVIDIA GeForce RTX 4090


In [14]:
import numpy as np

In [2]:
!pip install pandas scikit-learn tqdm

Collecting pandas
  Downloading pandas-2.2.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp313-cp313-win_amd64.whl.metadata (15 kB)
Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.2-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading pandas-2.2.3-cp313-cp313-win_amd64.whl (11.5 MB)
   ---------------------------------------- 0.0/11.5 MB ? eta -:--:--
   ---------------- ----------------------- 4.7/11.5 MB

In [3]:
import os
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score
from tqdm import tqdm

In [4]:
# ✅ Set your root path
image_root = r"C:\Users\Student\Downloads\archive"  # main folder containing images_001 to images_012
image_folders = [f'images_{str(i).zfill(3)}' for i in range(1, 13)]

# ✅ Load the metadata CSV
df = pd.read_csv(os.path.join(image_root, 'Data_Entry_2017.csv'))

# ✅ Fix any stray spaces in filenames
df['Image Index'] = df['Image Index'].str.strip()

# ✅ Build a map of image names → full paths (inside images/ subfolder)
image_paths = {}
for folder in tqdm(image_folders, desc="Indexing image folders"):
    folder_path = os.path.join(image_root, folder, 'images')  # <-- includes subfolder
    if not os.path.exists(folder_path): continue
    for fname in os.listdir(folder_path):
        image_paths[fname] = os.path.join(folder_path, fname)

# ✅ Keep only rows where images exist
df = df[df['Image Index'].isin(image_paths)]
df['Path'] = df['Image Index'].map(image_paths)

# ✅ Create binary label columns for each disease
all_labels = sorted(set(l for s in df['Finding Labels'] for l in s.split('|')))
for label in tqdm(all_labels, desc="Generating label columns"):
    df[label] = df['Finding Labels'].apply(lambda x: 1 if label in x else 0)

# ✅ Check the result
print("✅ Total usable images:", len(df))

Indexing image folders: 100%|██████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 70.35it/s]
Generating label columns: 100%|████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 53.85it/s]

✅ Total usable images: 112120





In [5]:
from sklearn.model_selection import train_test_split

# 🔁 Split into 80% train, 10% val, 10% test
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Optional: reset index for safety
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

# ✅ List of binary label columns
label_cols = all_labels


In [6]:
image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

class ChestXrayDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.data = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image = Image.open(row['Path']).convert("RGB")
        if self.transform:
            image = self.transform(image)
        labels = torch.tensor(row[label_cols].values.astype('float32'))
        return image, labels

In [7]:
train_loader = DataLoader(ChestXrayDataset(train_df, image_transforms), batch_size=32, shuffle=True)
val_loader = DataLoader(ChestXrayDataset(val_df, image_transforms), batch_size=32)
test_loader = DataLoader(ChestXrayDataset(test_df, image_transforms), batch_size=32)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def get_chexnet(num_classes):
    model = models.densenet121(pretrained=True)
    in_features = model.classifier.in_features
    model.classifier = nn.Sequential(
        nn.Linear(in_features, num_classes),
        nn.Sigmoid()
    )
    return model.to(device)

model = get_chexnet(len(label_cols))


Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to C:\Users\install/.cache\torch\hub\checkpoints\densenet121-a639ec97.pth
100.0%


In [15]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

def train_model(model, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())
        print(f"Train Loss: {total_loss / len(train_loader):.4f}")
        validate_model(model, val_loader)

def validate_model(model, loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Validating"):
            images = images.to(device)
            outputs = model(images)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(outputs.cpu().numpy())

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    
    auc = roc_auc_score(y_true, y_pred, average="macro")
    f1 = f1_score(y_true, (y_pred > 0.5).astype(int), average="macro")
    print(f"Val AUC: {auc:.4f}, F1: {f1:.4f}")

In [16]:
train_model(model, train_loader, val_loader, epochs=5)

Epoch 1/5: 100%|███████████████████████████████████████████████████████| 2803/2803 [20:32<00:00,  2.27it/s, loss=0.208]


Train Loss: 0.1655


Validating: 100%|████████████████████████████████████████████████████████████████████| 351/351 [02:31<00:00,  2.31it/s]


Val AUC: 0.8347, F1: 0.1932


Epoch 2/5: 100%|███████████████████████████████████████████████████████| 2803/2803 [20:30<00:00,  2.28it/s, loss=0.115]


Train Loss: 0.1571


Validating: 100%|████████████████████████████████████████████████████████████████████| 351/351 [02:29<00:00,  2.35it/s]


Val AUC: 0.8314, F1: 0.2434


Epoch 3/5: 100%|███████████████████████████████████████████████████████| 2803/2803 [20:35<00:00,  2.27it/s, loss=0.132]


Train Loss: 0.1453


Validating: 100%|████████████████████████████████████████████████████████████████████| 351/351 [02:30<00:00,  2.34it/s]


Val AUC: 0.8273, F1: 0.2526


Epoch 4/5: 100%|███████████████████████████████████████████████████████| 2803/2803 [20:25<00:00,  2.29it/s, loss=0.133]


Train Loss: 0.1297


Validating: 100%|████████████████████████████████████████████████████████████████████| 351/351 [02:31<00:00,  2.32it/s]


Val AUC: 0.8175, F1: 0.2701


Epoch 5/5: 100%|███████████████████████████████████████████████████████| 2803/2803 [20:19<00:00,  2.30it/s, loss=0.136]


Train Loss: 0.1099


Validating: 100%|████████████████████████████████████████████████████████████████████| 351/351 [02:27<00:00,  2.38it/s]


Val AUC: 0.8108, F1: 0.2789


In [17]:
# ✅ Save final trained model
torch.save(model.state_dict(), 'chexnet_model.pth')
print("✅ Final model saved as chexnet_model.pth")

✅ Final model saved as chexnet_model.pth


In [25]:
def test_model(model, loader):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(outputs.cpu().numpy())

    y_pred = np.array(y_pred)
    y_true = np.array(y_true)

    auc = roc_auc_score(y_true, y_pred, average="macro")
    f1 = f1_score(y_true, (y_pred > 0.5), average="macro")
    print(f"Test AUC: {auc:.4f}, F1: {f1:.4f}")

test_model(model, test_loader)

Test AUC: 0.8060, F1: 0.2740


In [28]:
import streamlit as st
import torch
import torchvision.transforms as transforms
from PIL import Image
from torchvision import models
import torch.nn as nn

# Define class labels (adjust these to match your project)
class_names = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion',
               'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass',
               'Nodule', 'Pleural_Thickening', 'Pneumonia', 'Pneumothorax', 'No Finding']

# Load model
def load_model():
    model = models.resnet34(pretrained=False)
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 512),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(512, len(class_names)),
        nn.Sigmoid()
    )
    model.load_state_dict(torch.load("resnet34_chestxray_model.pth", map_location=torch.device('cpu')))
    model.eval()
    return model

# Image preprocessing
def transform_image(image):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    return transform(image).unsqueeze(0)

# Predict function
def predict(image, model):
    image_tensor = transform_image(image)
    outputs = model(image_tensor)
    predictions = (outputs > 0.5).squeeze().detach().numpy()
    return [class_names[i] for i in range(len(predictions)) if predictions[i]]

# Streamlit UI
st.title("Chest X-ray Disease Predictor")

uploaded_file = st.file_uploader("Upload Chest X-ray Image", type=["jpg", "png", "jpeg"])
if uploaded_file is not None:
    image = Image.open(uploaded_file).convert("RGB")
    st.image(image, caption='Uploaded X-ray', use_column_width=True)

    model = load_model()
    with st.spinner('Predicting...'):
        prediction = predict(image, model)

    if prediction:
        st.success("Diseases Detected:")
        for disease in prediction:
            st.write(f"- {disease}")
    else:
        st.info("No disease detected with current threshold.")

2025-04-16 17:13:13.278 
  command:

    streamlit run C:\ProgramData\anaconda3\envs\pyenv\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [27]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Downloading altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<6,>=4.0 (from streamlit)
  Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)
Collecting click<9,>=7.0 (from streamlit)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting protobuf<6,>=3.20 (from streamlit)
  Downloading protobuf-5.29.4-cp310-abi3-win_amd64.whl.metadata (592 bytes)
Collecting pyarrow>=7.0 (from streamlit)
  Downloading pyarrow-19.0.1-cp313-cp313-win_amd64.whl.metadata (3.4 kB)
Collecting tenacity<10,>=8.1.0 (from streamlit)
  Downloading tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Downloading toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting watchdog<7,>=2.1