# Project: CliniScan - Lung Abnormality Detection
# Dataset: VinDr-CXR (Chest X-rays)
# Model: YOLOv8
# Created by: Prarthana


# Model

In [None]:
import os, shutil
from tqdm import tqdm

# Paths
ORIG_TRAIN_IMG = "/content/drive/MyDrive/CliniScan/3_Preprocessing/images/train"
ORIG_TRAIN_LBL = "/content/drive/MyDrive/CliniScan/3_Preprocessing/labels/train"

AUG_TRAIN_IMG = "/content/drive/MyDrive/CliniScan/4_Augmentation/images"

VAL_IMG = "/content/drive/MyDrive/CliniScan/3_Preprocessing/images/val"
VAL_LBL = "/content/drive/MyDrive/CliniScan/3_Preprocessing/labels/val"

# Final output folders for training YOLO
FINAL_TRAIN_IMG = "/content/drive/MyDrive/CliniScan/5_Model/final_train_images"
FINAL_TRAIN_LBL = "/content/drive/MyDrive/CliniScan/5_Model/final_train_labels"
FINAL_VAL_IMG   = "/content/drive/MyDrive/CliniScan/5_Model/val_images"
FINAL_VAL_LBL   = "/content/drive/MyDrive/CliniScan/5_Model/val_labels"

os.makedirs(FINAL_TRAIN_IMG, exist_ok=True)
os.makedirs(FINAL_TRAIN_LBL, exist_ok=True)
os.makedirs(FINAL_VAL_IMG, exist_ok=True)
os.makedirs(FINAL_VAL_LBL, exist_ok=True)

# Copy original train images + labels
print("Copying ORIGINAL train images...")
for f in tqdm(os.listdir(ORIG_TRAIN_IMG)):
    shutil.copy(os.path.join(ORIG_TRAIN_IMG, f), os.path.join(FINAL_TRAIN_IMG, f))

print("Copying ORIGINAL train labels...")
for f in tqdm(os.listdir(ORIG_TRAIN_LBL)):
    shutil.copy(os.path.join(ORIG_TRAIN_LBL, f), os.path.join(FINAL_TRAIN_LBL, f))

# Copy augmented train images ONLY
print("Copying AUGMENTED train images...")
for f in tqdm(os.listdir(AUG_TRAIN_IMG)):
    shutil.copy(os.path.join(AUG_TRAIN_IMG, f), os.path.join(FINAL_TRAIN_IMG, f))

# Copy validation images + labels
print("Copying VAL images & labels...")
for f in os.listdir(VAL_IMG):
    shutil.copy(os.path.join(VAL_IMG, f), os.path.join(FINAL_VAL_IMG, f))

for f in os.listdir(VAL_LBL):
    shutil.copy(os.path.join(VAL_LBL, f), os.path.join(FINAL_VAL_LBL, f))

print("\nMerge complete!")


Copying ORIGINAL train images...


100%|██████████| 13500/13500 [03:10<00:00, 70.99it/s]


Copying ORIGINAL train labels...


100%|██████████| 13500/13500 [04:33<00:00, 49.33it/s]


Copying AUGMENTED train images...


100%|██████████| 500/500 [00:06<00:00, 73.87it/s]


Copying VAL images & labels...

Merge complete!


In [None]:
import os, shutil
from tqdm import tqdm

BASE = "/content/drive/MyDrive/CliniScan/5_Model"

# Correct folders YOLO expects
TRAIN_IMG = f"{BASE}/train/images"
TRAIN_LBL = f"{BASE}/train/labels"
VAL_IMG   = f"{BASE}/val/images"
VAL_LBL   = f"{BASE}/val/labels"

os.makedirs(TRAIN_IMG, exist_ok=True)
os.makedirs(TRAIN_LBL, exist_ok=True)
os.makedirs(VAL_IMG, exist_ok=True)
os.makedirs(VAL_LBL, exist_ok=True)

# Your current folders
OLD_TRAIN_IMG = f"{BASE}/final_train_images"
OLD_TRAIN_LBL = f"{BASE}/final_train_labels"
OLD_VAL_IMG   = f"{BASE}/val_images"
OLD_VAL_LBL   = f"{BASE}/val_labels"

# Move train images
for f in tqdm(os.listdir(OLD_TRAIN_IMG), desc="Move train images"):
    shutil.move(os.path.join(OLD_TRAIN_IMG, f), os.path.join(TRAIN_IMG, f))

# Move train labels
for f in tqdm(os.listdir(OLD_TRAIN_LBL), desc="Move train labels"):
    shutil.move(os.path.join(OLD_TRAIN_LBL, f), os.path.join(TRAIN_LBL, f))

# Move val images
for f in tqdm(os.listdir(OLD_VAL_IMG), desc="Move val images"):
    shutil.move(os.path.join(OLD_VAL_IMG, f), os.path.join(VAL_IMG, f))

# Move val labels
for f in tqdm(os.listdir(OLD_VAL_LBL), desc="Move val labels"):
    shutil.move(os.path.join(OLD_VAL_LBL, f), os.path.join(VAL_LBL, f))

print("✔ All files moved into YOLO-correct structure!")

Move train images: 100%|██████████| 13500/13500 [01:13<00:00, 183.93it/s]
Move train labels: 100%|██████████| 13500/13500 [01:08<00:00, 197.80it/s]
Move val images: 100%|██████████| 1500/1500 [00:05<00:00, 261.55it/s]
Move val labels: 100%|██████████| 1500/1500 [00:05<00:00, 292.85it/s]

✔ All files moved into YOLO-correct structure!





In [None]:
yaml_content = """
path: /content/drive/MyDrive/CliniScan/5_Model

train: train
val: val

nc: 15

names:
  0: Aortic enlargement
  1: Atelectasis
  2: Calcification
  3: Cardiomegaly
  4: Consolidation
  5: ILD
  6: Infiltration
  7: Lung Opacity
  8: No finding
  9: Nodule/Mass
  10: Other lesion
  11: Pleural effusion
  12: Pleural thickening
  13: Pneumothorax
  14: Pulmonary fibrosis
"""

with open("/content/drive/MyDrive/CliniScan/5_Model/dataset.yaml", "w") as f:
    f.write(yaml_content)

print("dataset.yaml created!")

dataset.yaml created!


In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.233-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.233-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.18-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.233 ultralytics-thop-2.0.18


In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")

model.train(
    data="/content/drive/MyDrive/CliniScan/5_Model/dataset.yaml",
    epochs=30,
    imgsz=512,
    batch=8,
    device=0,
    project="/content/drive/MyDrive/CliniScan/5_Model",
    name="CliniScan_YOLO_Final"
)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[34m[1mtrain: [0m/content/drive/MyDrive/CliniScan/5_Model/train/images/9ba0cb91053c58194976504994cd1a6c.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates []
[34m[1mtrain: [0m/content/drive/MyDrive/CliniScan/5_Model/train/images/9bae129eba7ed9ee3489ec51e0bdb05d.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates []
[34m[1mtrain: [0m/content/drive/MyDrive/CliniScan/5_Model/train/images/9bb56fa80a00fbb3e04564b733f0ea7b.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates []
[34m[1mtrain: [0m/content/drive/MyDrive/CliniScan/5_Model/train/images/9bb5a1c72d2c871377e730d74d5b9425.jpg: ignoring corrupt image/label: non-normalized or out of bounds coordinates []
[34m[1mtrain: [0m/content/drive/MyDrive/CliniScan/5_Model/train/images/9bc665785728c6f1d2a0e54d88becd16.jpg: ignoring corrupt image/label: non-normalized or out of bounds coord

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  9, 10, 11, 12, 13, 14])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7ca22fed3740>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.0