In [None]:
import torch
from diffusers import AutoPipelineForText2Image
from PIL import Image
import os

pipe = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/sd-turbo",
    torch_dtype=torch.float16
)

device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)

prompts = [
    "Chest X-ray, AP view, elderly female, clear lung fields, no infiltrates or opacities, normal mediastinum, clinical imaging style ",
    "Chest X-ray, PA view, adult, focal lobar consolidation in right lower lobe, bacterial pneumonia pattern, realistic diagnostic grayscale radiograph",
    "Chest X-ray, PA view, focal opacity in right middle lobe, radiodense lung lesion, grayscale diagnostic imaging",
    "Chest X-ray, PA view, blunting of costophrenic angle, pleural effusion on right side, mediastinal shift, grayscale radiography",
    "Chest X-ray, PA view, cardiomegaly with enlarged cardiac silhouette, normal lung fields, grayscale diagnostic imaging",
    "Chest X-ray, AP view, endotracheal tube and central venous catheter in position, diagnostic grayscale imaging",
    "Chest X-ray, PA view, motion artifact causing blurred ribs and diaphragm edges, clinical imaging artifact",
    "Chest X-ray, AP view, supine position, portable ICU radiograph",
    "Chest X-ray, portable AP imaging, ICU setting, low-resolution clinical radiograph, scanner domain variation",
    "Chest X-ray, PA view, cardiomegaly with enlarged cardiac silhouette, normal lung fields, grayscale diagnostic imaging"
]

output_dir = "Medical_dataset"
os.makedirs(output_dir, exist_ok=True)

for idx, prompt in enumerate(prompts):
    image = pipe(
        prompt=prompt,
        num_inference_steps=4,
        guidance_scale=0.0
    ).images[0]

    image.save(f"{output_dir}/image_{idx+1}.png")

print("Synthetic dataset generated successfully!")


In [None]:
import os
from PIL import Image
import matplotlib.pyplot as plt

folder = "/content/Medical_dataset/"

for file in os.listdir(folder):
    if file.endswith(".png"):
        img = Image.open(folder + file)
        plt.figure(figsize=(4,4))
        plt.imshow(img)
        plt.axis('off')
        plt.title(file)
        plt.show()


In [None]:
pip install torchxrayvision


**Testing the dataset on pretrained DenseNet Model**

In [None]:
import torch
import torchvision.transforms as transforms
import torchxrayvision as xrv
from PIL import Image
import os
from sklearn.metrics import accuracy_score, classification_report

model = xrv.models.DenseNet(weights="all")
model.eval()

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

data_path = "/content/Medical_dataset"

y_true = [
    "Normal",
    "Pneumonia",
    "Mass",
    "Effusion",
    "Cardiomegaly",
    "Normal",
    "Normal",
    "Normal",
    "Normal",
    "Cardiomegaly"
]

pathologies = model.pathologies
y_pred = []

for file in sorted(os.listdir(data_path)):
    img = Image.open(os.path.join(data_path, file)).convert("L")
    img = transform(img).unsqueeze(0)

    with torch.no_grad():
        output = model(img)[0]

    idx = output.argmax().item()
    predicted_label = pathologies[idx]
    y_pred.append(predicted_label)


mapping = {
    "Cardiomegaly": "Cardiomegaly",
    "Pneumonia": "Pneumonia",
    "Mass": "Mass",
    "Effusion": "Effusion",
}

y_pred_final = [mapping.get(lbl, "Normal") for lbl in y_pred]

acc = accuracy_score(y_true, y_pred_final)
print("Accuracy:", acc)

print(classification_report(y_true, y_pred_final))


In [None]:
print("Accuracy of the dataset that created through hugging face on a pretrained Model: ",acc)

**Testing the Real Dataset on the Pretrained model DenseNet**

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!pip install kaggle


In [None]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia -p ./data


In [None]:
!unzip ./data/chest-xray-pneumonia.zip -d ./data/


In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from sklearn.metrics import accuracy_score
import torchxrayvision as xrv

model = xrv.models.DenseNet(weights="all")
model.eval()

transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((224,224)),
    transforms.ToTensor()
])

test_path = "./data/chest_xray/test/"
test_data = datasets.ImageFolder(test_path, transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=16, shuffle=False)

idx_to_label = {v:k for k,v in test_data.class_to_idx.items()}

pathologies = model.pathologies

y_true = []
y_pred = []

for imgs, labels in test_loader:
    with torch.no_grad():
        outputs = model(imgs)

    preds = outputs.argmax(1)

    for p in preds:
        predicted_pathology = pathologies[p]

        if predicted_pathology in ["Pneumonia", "Infiltration"]:
            y_pred.append("PNEUMONIA")
        else:
            y_pred.append("NORMAL")

    for l in labels:
        y_true.append(idx_to_label[int(l)].upper())


In [None]:
acc = accuracy_score(y_true, y_pred)
print("Test Accuracy:", acc*100)


In [None]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia -p ./data
!unzip ./data/chest-xray-pneumonia.zip -d ./data/


In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from sklearn.metrics import accuracy_score
import torchxrayvision as xrv

model = xrv.models.DenseNet(weights="all")
model.eval()

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.Grayscale(),
    transforms.ToTensor()
])

test_data = datasets.ImageFolder("./data/chest_xray/test/", transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=16, shuffle=False)

idx_to_label = {v:k for k,v in test_data.class_to_idx.items()}
pathologies = model.pathologies

y_true, y_pred = [], []

for imgs, labels in test_loader:
    with torch.no_grad():
        outputs = model(imgs)

    preds = outputs.argmax(1)

    for p in preds:
        pred = pathologies[int(p)]
        if pred in ["Pneumonia","Infiltration"]:
            y_pred.append("PNEUMONIA")
        else:
            y_pred.append("NORMAL")

    for l in labels:
        y_true.append(idx_to_label[int(l)].upper())

acc = accuracy_score(y_true, y_pred)
print("Accuracy on small real dataset:", acc*100)


**Low accuracy occurred because the dataset contained mostly pneumonia cases, causing DenseNet to misclassify due to missing normal comparison categories.**