In [1]:
from pathlib import Path
import yaml

In [2]:
# ! pip install torch torchvision torchaudio  
# ! pip install ultralytics  

In [3]:
# ! pip install ultralytics opencv-python pandas numpy albumentations torch torchvision matplotlib

In [2]:
# ! pip install yolov5

import pandas as pd

In [6]:
# import os
# import shutil

# from sklearn.model_selection import train_test_split

# # === 1. Load Metadata === #
# df = pd.read_csv(r"D:\Projects\Disease_Detection\Ground_Truth.csv")
# df['Finding Labels'] = df['Finding Labels'].fillna('No Finding')

# # === 2. Create Unique Class Map === #
# all_labels = set()
# for labels in df['Finding Labels']:
#     for label in labels.split('|'):
#         all_labels.add(label.strip())

# all_labels = sorted(list(all_labels))
# class_map = {label: idx for idx, label in enumerate(all_labels)}

# print("✅ Class Map:", class_map)

# # === 3. Setup Folder Structure === #
# for folder in ['images/train', 'images/val', 'labels/train', 'labels/val']:
#     os.makedirs(folder, exist_ok=True)

# # === 4. Filter Only Available Images === #
# image_folder = r"D:\Projects\Disease_Detection\xray_images"

# def get_valid_image_path(image_name):
#     for ext in ['.png', '.jpg', '.jpeg']:
#         candidate = os.path.join(image_folder, image_name.replace('.png', ext))
#         if os.path.exists(candidate):
#             return candidate
#     return None

# df['image_path'] = df['Image Index'].apply(get_valid_image_path)
# df = df[df['image_path'].notnull()]  # Keep only rows with valid images

# print(f"✅ Valid images found: {len(df)}")

# # === 5. Train/Val Split === #
# train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# # === 6. Process Data === #
# def process(df_split, split_name):
#     for _, row in df_split.iterrows():
#         img_name = row['Image Index']
#         img_path = row['image_path']
#         labels = row['Finding Labels'].split('|')

#         # Create YOLO format label
#         label_lines = []
#         for l in labels:
#             l = l.strip()
#             if l != "No Finding":  # Optional: skip images with no findings
#                 label_id = class_map[l]
#                 label_lines.append(f"{label_id} 0.5 0.5 1.0 1.0")  # whole image bbox

#         if len(label_lines) == 0:
#             continue  # Skip if no valid labels

#         # Save label file
#         label_filename = img_name.replace('.png', '.txt').replace('.jpg', '.txt').replace('.jpeg', '.txt')
#         label_path = f"labels/{split_name}/{label_filename}"
#         with open(label_path, 'w') as f:
#             f.write("\n".join(label_lines))

#         # Copy image
#         dst_img_path = f"images/{split_name}/{img_name}"
#         shutil.copy(img_path, dst_img_path)

# # === 7. Run Processing === #
# process(train_df, 'train')
# process(val_df, 'val')

# # === 8. Generate data.yaml === #
# with open("data.yaml", "w") as f:
#     f.write("train: ./images/train\n")
#     f.write("val: ./images/val\n")
#     f.write(f"nc: {len(all_labels)}\n")
#     f.write("names: [" + ", ".join([f"'{c}'" for c in all_labels]) + "]\n")

# print("✅ All Done. YOLOv5 dataset is ready!")



In [3]:
import os
import shutil
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# === 1. Load Metadata === #
df = pd.read_csv(r"D:\Projects\Disease_Detection\X-Ray_image-Dataset\Chest_XRay_Dataset\Ground_Truth.csv")
df['Finding Labels'] = df['Finding Labels'].fillna('No Finding')

# === 2. Create Unique Class Map === #
all_labels = set()
for labels in df['Finding Labels']:
    for label in labels.split('|'):
        all_labels.add(label.strip())

all_labels = sorted(list(all_labels))
class_map = {label: idx for idx, label in enumerate(all_labels)}

print("✅ Class Map:", class_map)

# === 3. Setup Folder Structure === #
for folder in ['images/train', 'images/val', 'labels/train', 'labels/val']:
    os.makedirs(folder, exist_ok=True)

# === 4. Filter Only Available Images === #
image_folder = r"D:\Projects\Disease_Detection\X-Ray_image-Dataset\Chest_XRay_Dataset\xray_images"

def get_valid_image_path(image_name):
    for ext in ['.png', '.jpg', '.jpeg']:
        candidate = os.path.join(image_folder, image_name.replace('.png', ext))
        if os.path.exists(candidate):
            return candidate
    return None

df['image_path'] = df['Image Index'].apply(get_valid_image_path)
df = df[df['image_path'].notnull()]  # Keep only rows with valid images

print(f"✅ Valid images found: {len(df)}")

# === 5. Train/Val Split === #
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# === 6. Preprocessing Function === #
def preprocess_image(img_path, resize_dim=(640, 640)):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None

    # Histogram Equalization
    img = cv2.equalizeHist(img)

    # Optional: CLAHE
    # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    # img = clahe.apply(img)

    # Resize and normalize
    img = cv2.resize(img, resize_dim)
    img = cv2.normalize(img.astype('float32'), None, 0.0, 255.0, cv2.NORM_MINMAX)
    return img.astype(np.uint8)

# === 7. Process Data === #
def process(df_split, split_name):
    for _, row in df_split.iterrows():
        img_name = row['Image Index']
        img_path = row['image_path']
        labels = row['Finding Labels'].split('|')

        # Preprocess the image
        processed_img = preprocess_image(img_path)
        if processed_img is None:
            continue

        # Create YOLO format label
        label_lines = []
        for l in labels:
            l = l.strip()
            if l != "No Finding":
                label_id = class_map[l]
                label_lines.append(f"{label_id} 0.5 0.5 1.0 1.0")  # whole image bbox

        if len(label_lines) == 0:
            continue

        # Save label file
        label_filename = img_name.replace('.png', '.txt').replace('.jpg', '.txt').replace('.jpeg', '.txt')
        label_path = f"labels/{split_name}/{label_filename}"
        with open(label_path, 'w') as f:
            f.write("\n".join(label_lines))

        # Save preprocessed image
        dst_img_path = f"images/{split_name}/{img_name}"
        cv2.imwrite(dst_img_path, processed_img)

# === 8. Run Processing === #
process(train_df, 'train')
process(val_df, 'val')

# === 9. Generate data.yaml === #
with open("data.yaml", "w") as f:
    f.write("train: ./images/train\n")
    f.write("val: ./images/val\n")
    f.write(f"nc: {len(all_labels)}\n")
    f.write("names: [" + ", ".join([f"'{c}'" for c in all_labels]) + "]\n")

print("✅ All Done. YOLOv5 dataset with preprocessing is ready!")


✅ Class Map: {'Atelectasis': 0, 'Cardiomegaly': 1, 'Consolidation': 2, 'Edema': 3, 'Effusion': 4, 'Emphysema': 5, 'Fibrosis': 6, 'Hernia': 7, 'Infiltration': 8, 'Mass': 9, 'No Finding': 10, 'Nodule': 11, 'Pleural_Thickening': 12, 'Pneumonia': 13, 'Pneumothorax': 14}
✅ Valid images found: 3681
✅ All Done. YOLOv5 dataset with preprocessing is ready!


In [5]:
! git clone https://github.com/ultralytics/yolov5.git

fatal: destination path 'yolov5' already exists and is not an empty directory.


In [None]:
# import warnings
# warnings.filterwarnings("ignore")

# %cd yolov5

d:\Projects\Disease_Detection\yolov5\yolov5


In [None]:
# ! pip install yolov5

In [13]:
# ! pip install -r "D:\Projects\Disease_Detection\yolov5\requirements.txt"

In [None]:
# import os

# # Set your YOLOv5 folder path
# yolov5_path = r"D:\Projects\Disease_Detection\yolov5"

# # Run Git safe.directory command using Python
# os.system(f'git config --global --add safe.directory "{yolov5_path}"')

0

In [None]:
# import shutil

# # Define paths
# src_images = r'D:\Projects\Disease_Detection\images'
# src_labels = r'D:\Projects\Disease_Detection\labels'
# dst_images = r'D:\Projects\Disease_Detection\yolov5\data_yolo\images'
# dst_labels = r'D:\Projects\Disease_Detection\yolov5\data_yolo\labels'

# # Move images and labels
# shutil.copytree(src_images, dst_images, dirs_exist_ok=True)
# shutil.copytree(src_labels, dst_labels, dirs_exist_ok=True)

# print("✅ Image and label folders copied to yolov5 project.")

✅ Image and label folders copied to yolov5 project.


In [6]:
import os
os.getcwd()


'd:\\Projects\\Disease_Detection\\YOLO_Project'

In [None]:
! python "D:\Projects\Disease_Detection\YOLO_Project\yolov5\train.py" --img 512 --batch 8 --epochs 20 --data "D:\Projects\Disease_Detection\YOLO_Project\data.yaml" --weights yolov5s.pt 

In [None]:
import os
import pandas as pd
import numpy as np
from utils.grad_cam import generate_gradcam
from torchvision import models, transforms
from PIL import Image
import cv2

model = models.resnet50(pretrained=True)
target_layer = model.layer4[2].conv3

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

df = pd.read_csv(r"F:\Guvi Projects\Disease Detection and Diagnosis (Final Project)\Chest_XRay_Dataset\Ground_Truth.csv")
bboxdata = []

for , row in df.iterrows():
    image_path = os.path.join(r"F:\Guvi Projects\Disease Detection and Diagnosis (Final Project)\Chest_XRay_Dataset\xray_images", row["Image Index"])
    image = Image.open(image_path)
    image_tensor = transform(image)

    cam = generate_gradcam(image_tensor, model, target_layer)

    threshold = 0.5
    cambin = (cam > threshold).astype(np.uint8) * 255
    contours,  = cv2.findContours(cam_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        bbox_data.append([row["Image Index"], x, y, x+w, y+h, row["Finding Labels"]])

pd.DataFrame(bbox_data, columns=["Image", "xmin", "ymin", "xmax", "ymax", "Labels"]).to_csv(r"F:\Guvi Projects\Disease Detection and Diagnosis (Final Project)\Datasets\pseudo_bboxes.csv", index=False)

In [None]:
import sys
sys.path.append(r'D:\Projects\Disease_Detection\yolov5')

In [25]:
from data_yolo.val import run as validate


In [None]:
import os
print(os.getcwd())

In [None]:
import os
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module=".*yaml.*")
print(os.getcwd())  # Check your current working directory
print(os.listdir())  # List all files in the directory

In [None]:
! git clone https://github.com/ultralytics/yolov5.git


In [None]:
! python --version

In [None]:
# ! pip install -r "D:\Projects\Disease_Detection\yolov5\requirements.txt"

In [None]:
data = {
    "train": "D:/Projects/Disease_Detection/yolov5/data_yolo/images/train/",
    "val": "D:/Projects/Disease_Detection/yolov5/data_yolo/images/val/",
    "nc": 13,  # Number of disease classes
    "names": [
        "Atelectasis", "Effusion", "Mass", "Infiltration", "Pneumonia",
        "Nodule", "Pneumothorax", "Consolidation", "Edema", "Emphysema",
        "Fibrosis", "Pleural_Thickening", "Hernia"
    ]
}

# Write to YAML file
file_path = "D:/Projects/Disease_Detection/yolov5/data_yolo/train_data.yaml"
with open(file_path, "w") as f:
    yaml.dump(data, f, default_flow_style=False)

print(f"train_data.yaml created at {file_path}")

In [None]:
! python "D:\Projects\Disease_Detection\yolov5\train.py" --img 640 --batch 16 --epochs 20 --data data.yaml --weights yolov5s.pt --device 0

In [None]:
! python detect.py --img 640 --conf 0.5 --source "D:\Projects\Disease_Detection\yolov5\data_yolo\images\train\00000251_000.png" --weights yolov5s.pt --view-img


In [None]:
! python detect.py --img 1024 --conf 0.5 --source r'D:\Projects\Disease_Detection\yolov5\data_yolo\images\train\00000250_014.png' --weights yolov5s.pt

# Move For Transfer Learning

In [None]:
# install required transfer learning packages

! pip install -r requirements.txt