In [1]:
pip install albumentations roboflow opencv-python numpy


Collecting roboflow
  Downloading roboflow-1.1.56-py3-none-any.whl.metadata (9.7 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless>=4.9.0.80 (from albumentations)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting python-dotenv (from roboflow)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.1.56-py3-none-any.whl (83 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.8/83.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading idna-3.7-py3-none-any.whl (66 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import zipfile
import os

zip_path = "/content/drive/MyDrive/Cancer images only.v1i.yolov8.zip"  
extract_to = "/content/data" 

os.makedirs(extract_to, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print("Dataset extracted successfully!")


✅ Dataset extracted successfully!


In [None]:
import os
import cv2
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Define your augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.RandomBrightnessContrast(p=0.2),
    A.Rotate(limit=15, p=0.5),
], bbox_params=A.BboxParams(format="yolo", label_fields=["category_ids"]))

def augment_dataset(images_folder, labels_folder, output_folder, num_augmentations=3):
    os.makedirs(output_folder, exist_ok=True)
    output_images = os.path.join(output_folder, "images")
    output_labels = os.path.join(output_folder, "labels")
    os.makedirs(output_images, exist_ok=True)
    os.makedirs(output_labels, exist_ok=True)

    for image_name in os.listdir(images_folder):
        if not image_name.endswith((".jpg", ".png", ".jpeg")):
            continue

        image_path = os.path.join(images_folder, image_name)
        label_path = os.path.join(labels_folder, image_name.replace(".jpg", ".txt").replace(".png", ".txt"))

        image = cv2.imread(image_path)
        height, width = image.shape[:2]

        with open(label_path, "r") as f:
            lines = f.readlines()

        bboxes = []
        category_ids = []

        for line in lines:
            parts = line.strip().split()
            class_id = int(float(parts[0]))  

            x_center, y_center, w, h = map(float, parts[1:5])

            bboxes.append([x_center, y_center, w, h])
            category_ids.append(class_id)

        for i in range(num_augmentations):
            augmented = transform(image=image, bboxes=bboxes, category_ids=category_ids)

            clipped_bboxes = []
            for bbox in augmented["bboxes"]:
                x_center, y_center, w, h = bbox
                x_center = np.clip(x_center, 0, 1)  # Ensure x_center is between 0 and 1
                y_center = np.clip(y_center, 0, 1)  # Ensure y_center is between 0 and 1
                w = np.clip(w, 0, 1)  # Ensure width is between 0 and 1
                h = np.clip(h, 0, 1)  # Ensure height is between 0 and 1
                clipped_bboxes.append([x_center, y_center, w, h])

            new_image_name = f"{image_name.split('.')[0]}_aug_{i}.jpg"
            new_image_path = os.path.join(output_images, new_image_name)
            cv2.imwrite(new_image_path, augmented["image"])

            new_label_path = os.path.join(output_labels, new_image_name.replace(".jpg", ".txt"))
            with open(new_label_path, "w") as f:
                for bbox, category_id in zip(clipped_bboxes, augmented["category_ids"]):
                    f.write(f"{category_id} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n")

    print(f" Augmented dataset saved in {output_folder}")

images_folder = "/content/data/train/images"
labels_folder = "/content/data/train/labels"
output_folder = "/content/augmentedfinal"

# Run augmentation
augment_dataset(images_folder, labels_folder, output_folder, num_augmentations=3)


In [14]:
!zip -r /content/augmented_output.zip /content/augmented_output
from google.colab import files
files.download("/content/augmented_output.zip")


  adding: content/augmented_output/ (stored 0%)
  adding: content/augmented_output/labels/ (stored 0%)
  adding: content/augmented_output/labels/139_jpeg_aug_1_aug_1.txt (deflated 39%)
  adding: content/augmented_output/labels/167_jpeg_aug_0_aug_1.txt (deflated 42%)
  adding: content/augmented_output/labels/403_jpeg_aug_2_aug_0.txt (deflated 28%)
  adding: content/augmented_output/labels/052_jpeg_aug_0_aug_0.txt (deflated 33%)
  adding: content/augmented_output/labels/060_jpeg_aug_2_aug_0.txt (deflated 41%)
  adding: content/augmented_output/labels/364_jpeg_aug_0_aug_1.txt (deflated 27%)
  adding: content/augmented_output/labels/093_jpeg_aug_0.txt (deflated 42%)
  adding: content/augmented_output/labels/016_jpeg_aug_1_aug_2.txt (deflated 47%)
  adding: content/augmented_output/labels/414_jpeg_aug_2.txt (deflated 28%)
  adding: content/augmented_output/labels/060_jpeg_aug_2.txt (deflated 41%)
  adding: content/augmented_output/labels/137_jpeg_aug_1_aug_1.txt (deflated 40%)
  adding: con

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [15]:
!zip -r /content/augmented_dataset.zip /content/augmented_dataset
from google.colab import files
files.download("/content/augmented_dataset.zip")


  adding: content/augmented_dataset/ (stored 0%)
  adding: content/augmented_dataset/labels/ (stored 0%)
  adding: content/augmented_dataset/labels/093_jpeg_aug_0.txt (deflated 40%)
  adding: content/augmented_dataset/labels/414_jpeg_aug_2.txt (deflated 26%)
  adding: content/augmented_dataset/labels/060_jpeg_aug_2.txt (deflated 41%)
  adding: content/augmented_dataset/labels/463_jpeg_aug_1.txt (deflated 28%)
  adding: content/augmented_dataset/labels/276_jpeg_aug_0.txt (deflated 28%)
  adding: content/augmented_dataset/labels/094_jpeg_aug_2.txt (deflated 33%)
  adding: content/augmented_dataset/labels/067_jpeg_aug_2.txt (deflated 25%)
  adding: content/augmented_dataset/labels/221_jpeg_aug_2.txt (deflated 41%)
  adding: content/augmented_dataset/labels/314_jpeg_aug_2.txt (deflated 28%)
  adding: content/augmented_dataset/labels/357_jpeg_aug_1.txt (deflated 27%)
  adding: content/augmented_dataset/labels/224_jpeg_aug_2.txt (deflated 41%)
  adding: content/augmented_dataset/labels/323_j

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>