# install library for image augmentation

In [None]:
!pip install albumentations



# import library

In [None]:
import os
import json
import cv2
import albumentations as A
from tqdm import tqdm

# check directory and mount drive

In [None]:
from google.colab import drive

# Check if the directory is already mounted
import os
if not os.path.exists('/content/drive/'):
  # Mount the drive if it's not mounted
  drive.mount('/content/drive/MyDrive/PMLD_new')
else:
  print("Drive is already mounted at /content/drive/MyDrive/PMLD_new")

Drive is already mounted at /content/drive/MyDrive/PMLD_new


# path check

In [None]:
# Cek path
!ls '/content/drive/MyDrive/PMLD_new/rock_dataset'

augmented  images  json  masks


# source path

In [None]:
image_dir = '/content/drive/MyDrive/PMLD_new/rock_dataset/images'
json_path = '/content/drive/MyDrive/PMLD_new/rock_dataset/json'
output_dir = '/content/drive/MyDrive/PMLD_new/rock_dataset/augmented'
os.makedirs(output_dir, exist_ok=True)

# images and json files mapping

In [None]:
import pandas as pd

json_files = [f for f in os.listdir(json_path) if f.endswith('.json')]
image_files = [f for f in os.listdir(image_dir) if f.endswith(('.png','.jpg'))]

# Buat mapping
data = []
for j in json_files:
    img_id = j.split('_')[-1].replace('.json', '')
    matching_imgs = [i for i in image_files if i.startswith(img_id)]
    data.append({
        'json_file': j,
        'image_files': matching_imgs,
        'status': 'MATCHED' if matching_imgs else 'NO MATCH'
    })

pd.DataFrame(data)

Unnamed: 0,json_file,image_files,status
0,labels_segmen_0005.json,[0005.png],MATCHED
1,labels_segmen_0002.json,[0002.png],MATCHED
2,labels_segmen_0001.json,[0001.png],MATCHED
3,labels_segmen_0004.json,[0004.png],MATCHED
4,labels_segmen_0006.json,[0006.png],MATCHED
5,labels_segmen_0008.json,[0008.png],MATCHED
6,labels_segmen_0009.json,[0009.png],MATCHED
7,labels_segmen_0007.json,[0007.png],MATCHED
8,labels_segmen_0010.json,[0010.png],MATCHED


# pipline image augmentation

In [None]:
# Augmentasi pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.RandomRotate90(p=0.3),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.3),
    A.HueSaturationValue(p=0.3),
    A.RGBShift(p=0.3),
    A.GaussNoise(p=0.2),
    A.MotionBlur(p=0.1),
    A.CLAHE(p=0.1),
    A.ToGray(p=0.05)
], bbox_params=A.BboxParams(format='coco', label_fields=['category_ids']))

# Helper untuk mapping id → name
augmented_images = []
augmented_annotations = []
ann_id = 10000  # Start from large to avoid conflict
img_id = 1000

# Daftar file gambar dan json
image_files = [f for f in os.listdir(image_dir) if f.endswith(('.png'))]
json_files = [f for f in os.listdir(json_path) if f.endswith('.json')]

# Memastikan bahwa file gambar dan json dengan nama yang sama ada
for img_file in image_files:
    # Mengambil nama dasar (tanpa ekstensi)
    img_basename = os.path.splitext(img_file)[0]

    # Mencari pasangan file JSON berdasarkan nama dasar
    json_file = f"{img_basename}.json"

    if json_file in json_files:
        # Jika file JSON ada, lanjutkan ke proses berikutnya
        image_path = os.path.join(image_dir, img_file)
        # FIX: json_dir was not defined, it should be json_path
        json_file_path = os.path.join(json_path, json_file)

        # Load gambar
        image = cv2.imread(image_path)

        # Load file JSON
        with open(json_file_path) as f: # FIX: Use json_file_path here
            coco = json.load(f)

        # Dapatkan anotasi untuk gambar ini
        anns = [a for a in coco['annotations'] if a['image_id'] == coco['images'][0]['id']]  # Sesuaikan dengan image_id yang tepat
        bboxes = [a['bbox'] for a in anns]
        cat_ids = [a['category_id'] for a in anns]

        for i in range(50):  # 50 augmentasi per gambar
            transformed = transform(image=image, bboxes=bboxes, category_ids=cat_ids)
            aug_img = transformed['image']
            aug_bboxes = transformed['bboxes']
            aug_cats = transformed['category_ids']

            new_filename = f"{img_basename}_aug_{i}.png"
            cv2.imwrite(os.path.join(output_dir, new_filename), aug_img)

            # Tambahkan entri image baru
            new_img_id = img_id
            augmented_images.append({
                "id": new_img_id,
                "width": aug_img.shape[1],
                "height": aug_img.shape[0],
                "file_name": new_filename
            })

            # Tambahkan anotasi
            for bbox, cat_id in zip(aug_bboxes, aug_cats):
                augmented_annotations.append({
                    "id": ann_id,
                    "image_id": new_img_id,
                    "category_id": cat_id,
                    "bbox": [float(round(x, 2)) for x in bbox],
                    "area": float(round(bbox[2] * bbox[3], 2)),
                    "iscrowd": 0,
                    "segmentation": []  # Kosongkan segmentation jika tidak bisa dihitung ulang
                })
                ann_id += 1
            img_id += 1
    else:
        print(f"File JSON untuk gambar {img_file} tidak ditemukan!")

# Load the original coco file before merging
with open(os.path.join(json_path, json_files[0])) as f: # Load the first json file as original
    original_coco = json.load(f)

# Gabungkan dengan original
augmented_coco = {
    "info": original_coco['info'], # Use original_coco here
    "images": original_coco['images'] + augmented_images, # Use original_coco here
    "annotations": original_coco['annotations'] + augmented_annotations, # Use original_coco here
    "categories": original_coco['categories'] # Use original_coco here
}

# Simpan file baru
with open(os.path.join(output_dir, 'augmented_annotations.json'), 'w') as f:
    json.dump(augmented_coco, f, indent=2)

print(f"Augmentasi selesai. File anotasi disimpan di {output_dir}/augmented_annotations.json")

Augmentasi selesai. File anotasi disimpan di /content/drive/MyDrive/PMLD_new/rock_dataset/augmented/augmented_annotations.json
