### Connect to Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# YOLO Format

### Load data

In [None]:
!unzip "/content/drive/MyDrive/001_projects/FSL/data/annotation_data/HCFA/pseudo_label_hcfa_500.zip"

In [None]:
!unzip "/content/project-4-at-2023-12-13-17-50-fd88f887_HCFA_500_YOLO.zip"

In [None]:
import os

os.listdir("/content/content/psudo_lable_hcfa_500/images").__len__()

In [None]:
!cp -r "/content/content/psudo_lable_hcfa_500/images" "/content/hcfa_augmented_human_annotated_500"

#### ADA

In [None]:
!unzip /content/drive/MyDrive/001_projects/FSL/FSL_annotations/CIGNA_DENTAL_ADA/Cigna_Dental.zip

In [None]:
!unzip /content/drive/MyDrive/001_projects/FSL/FSL_annotations/CIGNA_DENTAL_ADA/batch_1/project-8-at-2024-01-05-17-49-50518453_CIGNA_ADA_51_YOLO.zip

In [None]:
!unzip /content/drive/MyDrive/001_projects/FSL/FSL_annotations/CIGNA_DENTAL_ADA/batch_1/project-8-at-2024-01-05-17-50-50518453_CIGNA_ADA_51_COCO.zip

Archive:  /content/drive/MyDrive/001_projects/FSL/FSL_annotations/CIGNA_DENTAL_ADA/batch_1/project-8-at-2024-01-05-17-50-50518453_CIGNA_ADA_51_COCO.zip
  inflating: result.json             


### Check if the all the labels are available in Images

In [None]:
import os

# Set the paths for images and labels folders
images_folder = "/content/content/psudo_lable_hcfa_500/images"
# labels_folder = "/content/content/psudo_lable_hcfa_500/labels"
labels_folder = "/content/labels"


# Get a list of image file names without extensions
image_names = {os.path.splitext(file)[0] for file in os.listdir(images_folder) if os.path.isfile(os.path.join(images_folder, file))}

# Get a list of label file names without extensions
label_names = {os.path.splitext(file)[0] for file in os.listdir(labels_folder) if os.path.isfile(os.path.join(labels_folder, file))}

# Find labels without corresponding images
labels_without_images = label_names - image_names

if labels_without_images:
    print("Labels without corresponding images:")
    for label_name in labels_without_images:
        print(label_name)
else:
    print("All labels have corresponding images.")

All labels have corresponding images.


### Rename all the files in a folder

In [None]:
import os

def rename_files(folder_path, prefix, extension=".txt"):
    # Get a list of files in the folder
    files = [f for f in os.listdir(folder_path) if f.endswith(extension)]

    # Iterate through the files and rename them
    for i, filename in enumerate(files, start=1):
      try:
        old_path = os.path.join(folder_path, filename)
        new_filename = filename.split("-")[1]
        new_path = os.path.join(folder_path, new_filename)

        # Rename the file
        os.rename(old_path, new_path)
        print(f"Renamed {filename} to {new_filename}")
      except Exception as e:
        print(e)
        # break
# Example usage
folder_path = '/content/labels'
prefix = 'new_prefix'

rename_files(folder_path, prefix)


### Move images to a folder for which labels are available

In [None]:
import os
import shutil

def move_images(source_folder, destination_folder):
    # Ensure the destination folder exists
    os.makedirs(destination_folder, exist_ok=True)

    # Iterate through files in the source folder
    for filename in os.listdir(source_folder):
        if filename.endswith('.jpg'):
            image_path = os.path.join(source_folder, filename)
            txt_path = os.path.join("/content/labels", os.path.splitext(filename)[0] + '.txt')
            print(txt_path)
            # Check if the corresponding .txt file exists
            if os.path.exists(txt_path):
                # Move the image to the destination folder
                shutil.move(image_path, os.path.join(destination_folder, filename))
                print(f"Moved {filename} to {destination_folder}")
            else:
                print(f"No .txt file found for {filename}")

# Example usage
source_folder = '/content/Cigna_Dental'
destination_folder = '/content/images'

move_images(source_folder, destination_folder)


In [None]:
len(os.listdir("/content/images"))

51

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define paths
root_folder = "/content/hcfa_augmented_human_annotated_500"
images_folder = os.path.join(root_folder, "images")
labels_folder = os.path.join(root_folder, "labels")

# Create train and val folders
root_train_folder = os.path.join("data", "images", "train")
root_val_folder = os.path.join("data", "images", "valid")
root_train_labels_folder = os.path.join("data", "labels", "train")
root_val_labels_folder = os.path.join("data", "labels", "valid")

os.makedirs(root_train_folder, exist_ok=True)
os.makedirs(root_val_folder, exist_ok=True)
os.makedirs(root_train_labels_folder, exist_ok=True)
os.makedirs(root_val_labels_folder, exist_ok=True)

# Get list of image files
image_files = [f for f in os.listdir(images_folder) if os.path.isfile(os.path.join(images_folder, f))]

# Split the dataset
train_images, val_images = train_test_split(image_files, test_size=0.01, random_state=42)

# Copy images to train folder
for image in train_images:
    shutil.copy(os.path.join(images_folder, image), os.path.join(root_train_folder, image))

# Copy labels to train folder
for image in train_images:
    label_file = os.path.splitext(image)[0] + ".txt"
    shutil.copy(os.path.join(labels_folder, label_file), os.path.join(root_train_labels_folder, label_file))

# Copy images to val folder
for image in val_images:
    shutil.copy(os.path.join(images_folder, image), os.path.join(root_val_folder, image))

# Copy labels to val folder
for image in val_images:
    label_file = os.path.splitext(image)[0] + ".txt"
    shutil.copy(os.path.join(labels_folder, label_file), os.path.join(root_val_labels_folder, label_file))

In [None]:
!rm -rf "/content/data"

In [None]:
import os
os.listdir("/content/data/images/valid").__len__()

4

### Plot the existing annotations on Image

Sanity check on the annotated images

In [None]:
import cv2
import os

# Define paths
images_folder = "/content/images"
labels_folder = "/content/labels"

# os.makedirs(images_folder, exist_ok= True)
# os.makedirs(labels_folder, exist_ok= True)

# Iterate through each image and its corresponding label
for idx, image_name in enumerate(os.listdir(images_folder)):

    if idx == 5:
      break

    if image_name.endswith(".jpg") or image_name.endswith(".png"):
        # Load the image
        image_path = os.path.join(images_folder, image_name)
        image = cv2.imread(image_path)

        # Load corresponding label file
        label_name = os.path.splitext(image_name)[0] + ".txt"
        label_path = os.path.join(labels_folder, label_name)

        # Read YOLO annotations from the label file
        with open(label_path, "r") as file:
            lines = file.readlines()

        # Parse YOLO annotations (assuming normalized coordinates)
        boxes = []
        for line in lines:
            class_id, x_center, y_center, width, height = map(float, line.strip().split())
            x, y, w, h = (
                int((x_center - width / 2) * image.shape[1]),
                int((y_center - height / 2) * image.shape[0]),
                int(width * image.shape[1]),
                int(height * image.shape[0]),
            )
            boxes.append((x, y, x + w, y + h))

        # Draw bounding boxes on the image
        for box in boxes:
            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)

        # Display or save the result
        from google.colab.patches import cv2_imshow
        cv2_imshow(image)


In [None]:
!rm -rf "/content/augmented_data"

In [None]:
import os
import cv2
import numpy as np
from albumentations import (
    Compose, HorizontalFlip, VerticalFlip, Rotate, RandomBrightnessContrast,
    RandomGamma, RandomResizedCrop, MotionBlur, ElasticTransform, GaussNoise, Blur,
    ColorJitter
)

def yolo_to_albumentations(yolo_box, image):
    # Convert YOLO format [center_x, center_y, width, height] to [x_min, y_min, x_max, y_max]

    image_width, image_height = image.shape[1], image.shape[0]  # Fix the width and height order
    center_x, center_y, width, height = yolo_box
    x_min = max(0, (center_x - width / 2))
    y_min = max(0, (center_y - height / 2))
    x_max = min(1, (center_x + width / 2))
    y_max = min(1, (center_y + height / 2))

    return [x_min, y_min, x_max, y_max]

def load_image_and_labels(image_path, label_path):
    # Load image
    image = cv2.imread(image_path)

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Load labels (assuming YOLO format)
    labels = np.loadtxt(label_path).reshape(-1, 5)

    return image, labels

# def save_image_and_labels(image, labels, output_image_folder, output_labels_folder, filename, index):
#     # Save the image
#     cv2.imwrite(os.path.join(output_image_folder, f"{filename}_{index}.jpg"), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

#     # Save the augmented labels (assuming YOLO format)
#     np.savetxt(os.path.join(output_labels_folder, f"{filename}_{index}.txt"), labels, fmt='%.6f')

# def augment_and_save(image, labels, output_image_folder, output_labels_folder, filename, num_augmentations=10):
#     # Define augmentations
#     augmentations = Compose([
#         RandomBrightnessContrast(p=0.6),
#         RandomGamma(p=0.5),
#         Blur(blur_limit = 3, p=0.7),
#         ColorJitter(p=0.5)
#     ], bbox_params={'format': 'yolo', 'label_fields': ['category_id']})
#     # Apply augmentations and save the augmented images and labels
#     for i in range(num_augmentations):
#         # augmented = augmentations(image=image, bboxes=[yolo_to_albumentations(box, image) for box in labels[:, :4]], category_id=labels[:, 4])
#         augmented = augmentations(image=image, bboxes=labels[:, 1:5], category_id=labels[:, 0])
#         augmented_image = augmented['image']
#         # print(augmented_category_id)
#         augmented_labels = np.column_stack((augmented['category_id'].astype(int), augmented['bboxes']))
#         print("augmented_labels", augmented_labels)
#         save_image_and_labels(augmented_image, augmented_labels, output_image_folder, output_labels_folder, filename, i)

#     return augmented


def save_image_and_labels(image, labels, output_image_folder, output_labels_folder, filename, index):
    # Save the image
    cv2.imwrite(os.path.join(output_image_folder, f"{filename}_{index}.jpg"), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

    # Save the augmented labels (assuming YOLO format)
    np.savetxt(
        os.path.join(output_labels_folder, f"{filename}_{index}.txt"),
        labels, fmt=['%d', '%.6f', '%.6f', '%.6f', '%.6f']
    )

def augment_and_save(image, labels, output_image_folder, output_labels_folder, filename, num_augmentations=10):
    # Define augmentations
    augmentations = Compose([
        RandomBrightnessContrast(p=0.6),
        RandomGamma(p=0.5),
        Blur(blur_limit=3, p=0.7),
        ColorJitter(p=0.5)
    ], bbox_params={'format': 'yolo', 'label_fields': ['category_id']})

    # Apply augmentations and save the augmented images and labels
    for i in range(num_augmentations):
        augmented = augmentations(image=image, bboxes=labels[:, 1:5], category_id=labels[:, 0])

        # Convert category_id values to integers
        int_category_id = [int(x) for x in augmented['category_id']]

        # Stack the labels with the first column as integer and rest as floats
        augmented_labels = np.column_stack((int_category_id, augmented['bboxes']))

        save_image_and_labels(augmented['image'], augmented_labels, output_image_folder, output_labels_folder, filename, i)

    return augmented


# Input and Output folders
input_image_folder = "/content/images"
input_label_folder = "/content/labels"
output_image_folder = "/content/ada_batch_1_augmented_yolov8/images"
output_labels_folder = "/content/ada_batch_1_augmented_yolov8/labels"


# Create output folder if it doesn't exist
os.makedirs(output_image_folder, exist_ok=True)
os.makedirs(output_labels_folder, exist_ok=True)


# Process each image and labels
for filename in os.listdir(input_image_folder):
    if filename.endswith(".jpg"):  # Assuming images are in jpg format
        image_path = os.path.join(input_image_folder, filename)
        label_path = os.path.join(input_label_folder, f"{os.path.splitext(filename)[0]}.txt")

        image, labels = load_image_and_labels(image_path, label_path)

        augmented = augment_and_save(image, labels, output_image_folder, output_labels_folder, os.path.splitext(filename)[0], num_augmentations=5)

In [None]:
import gc
gc.collect()

120

In [None]:
len(os.listdir("/content/hcfa_augmented_human_annotated_500_v1/images/valid"))

20

In [None]:
!zip -r "ada_batch_1_augmented_yolov8.zip" "./ada_batch_1_augmented_yolov8"

In [None]:
!mv "/content/ada_batch_1_augmented_yolov8.zip" "/content/drive/MyDrive/001_projects/FSL/FSL_annotations/CIGNA_DENTAL_ADA/batch_1"

In [None]:
!rm -rf "/content/drive/MyDrive/001_projects/FSL/data/augmented_data/augmented_updated_190.zip"

In [None]:
os.listdir("/content/data/images/train")

In [None]:
!cp -r "/content/hcfa_augmented_human_annotated_500_v1/images" "/content/HCFA_coco_human_annotated_augmented_500/dataset"

## Convert YOLO to COCO format
code taken from other notebook

In [None]:
notes_json_path = "/content/hcfa_augmented_human_annotated_500_v1/notes.json"

In [None]:
import json

# Open and read the JSON file
with open(notes_json_path, 'r') as json_file:
    json_data = json.load(json_file)

In [None]:
categories = json_data['categories']

In [None]:
categories

In [None]:
import json
import os
from PIL import Image

# Set the paths for the input and output directories
input_dir = '/content/hcfa_augmented_human_annotated_500_v1/images/valid'
input_label_dir = "/content/hcfa_augmented_human_annotated_500_v1/labels/valid"
output_dir = '/content/coco/dataset'

os.makedirs(output_dir, exist_ok = True)

In [None]:
# # Define the categories for the COCO dataset
# categories = [{"id": 0, "name": "bottle"}]

# Define the COCO dataset dictionary
coco_dataset = {
    "info": {},
    "licenses": [],
    "categories": categories,
    "images": [],
    "annotations": []
}

# Loop through the images in the input directory
for image_id, image_file in enumerate(os.listdir(input_dir)):

    # Load the image and get its dimensions
    image_path = os.path.join(input_dir, image_file)
    image = Image.open(image_path)
    width, height = image.size

    # Add the image to the COCO dataset
    image_dict = {
        "id": image_id,
        "width": width,
        "height": height,
        "file_name": image_file
    }
    coco_dataset["images"].append(image_dict)

    # Load the bounding box annotations for the image
    with open(os.path.join(input_label_dir, f'{image_file.split(".")[0]}.txt')) as f:
        annotations = f.readlines()

    # Loop through the annotations and add them to the COCO dataset
    for annotation_id, ann in enumerate(annotations):
        x, y, w, h = map(float, ann.strip().split()[1:])
        x_min, y_min = int((x - w / 2) * width), int((y - h / 2) * height)
        x_max, y_max = int((x + w / 2) * width), int((y + h / 2) * height)
        ann_dict = {
            "id": annotation_id,
            "image_id": image_id,
            "category_id": class_id,
            "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],
            "area": (x_max - x_min) * (y_max - y_min),
            "iscrowd": 0
        }
        coco_dataset["annotations"].append(ann_dict)
    # break

# Save the COCO dataset to a JSON file
with open(os.path.join(output_dir, 'valid.json'), 'w') as f:
    json.dump(coco_dataset, f)

In [None]:
!unzip "/content/drive/MyDrive/001_projects/FSL/data/human_annotated/HCFA/HCFA_coco_human_annotated_augmented_500.zip"

In [None]:
import os
import json
from PIL import Image

def yolo_to_coco(yolo_images_folder, yolo_labels_folder, coco_annotations_path, notes_json_path= "/content/notes.json"):
    yolo_label_files = [f for f in os.listdir(yolo_labels_folder) if f.endswith(".txt")]

    with open(notes_json_path, 'r') as json_file:
      json_data = json.load(json_file)

    categories = json_data['categories']

    coco_data = {
        "images": [],
        "annotations": [],
        "categories": categories,
    }

    categories_dict = {}
    annotations = []

    for idx, label_file in enumerate(yolo_label_files):
        # image_id = str(label_file.split('.')[0])
        image_name = str(label_file.split('.')[0])
        image_path = os.path.join(yolo_images_folder, f"{image_name}.jpg")
        label_file_path = os.path.join(yolo_labels_folder, label_file)

        image = Image.open(image_path)
        width, height = image.size
        # COCO image entry
        image_entry = {
            "id": idx,
            "width": width,
            "height": height,
            "file_name": f"""images\\{image_name}.jpg""",
        }
        coco_data["images"].append(image_entry)

        with open(label_file_path, 'r') as yolo_file:
            lines = yolo_file.readlines()

        for line in lines:
            values = line.strip().split()
            class_id = int(values[0])
            bbox = [float(x) for x in values[1:5]]  # x_center, y_center, width, height

            # COCO annotation entry
            annotation_id = len(annotations) + 1
            annotation_entry = {
                "id": annotation_id,
                "image_id": idx,
                "category_id": class_id,
                "bbox": bbox,
                "area": bbox[2] * bbox[3],
                "iscrowd": 0,
            }
            annotations.append(annotation_entry)

            # COCO category entry
            # if class_id not in categories_dict:
            #     categories_dict[class_id] = True
            #     category_entry = {
            #         "id": class_id,
            #         "name": f"class_{class_id}",
            #         "supercategory": "object",
            #     }
            #     coco_data["categories"].append(category_entry)

    coco_data["annotations"] = annotations

    with open(coco_annotations_path, 'w') as coco_file:
        json.dump(coco_data, coco_file, indent=4)

# Example usage
yolo_to_coco('/content/ada_batch_1_augmented_yolov8/images', '/content/ada_batch_1_augmented_yolov8/labels', '/content/valid_v1.json')


In [None]:
!zip -r "HCFA_coco_human_annotated_augmented_500.zip" "./HCFA_coco_human_annotated_augmented_500/"


zip error: Nothing to do! (try: zip -r HCFA_coco_human_annotated_augmented_500.zip . -i ./HCFA_coco_human_annotated_augmented_500/)


In [None]:
!du -h /content/HCFA_coco_human_annotated_augmented_500.zip

1.7G	/content/HCFA_coco_human_annotated_augmented_500.zip


In [None]:
!cp -r "/content/HCFA_coco_human_annotated_augmented_500.zip" "/content/drive/MyDrive/001_projects/FSL/data/human_annotated/HCFA"

In [None]:
!cp -r "/content/HCFA_coco_human_annotated_augmented_500/dataset/train_v1.json" "/content/drive/MyDrive/001_projects/FSL/data/human_annotated/HCFA"

### Copy necessary image files from all images folder

In [None]:
!unzip "/content/drive/MyDrive/001_projects/FSL/data/yolo_data/HCFA_data_yolo/For_Annotation_500.zip"

In [None]:
import os
import shutil

# Set the paths for images_all and labels folders
images_all_folder = "/content/For_Annotation_500"
labels_folder = "/content/HCFA_100_yolo/labels"
output_images_folder = "/content/images"  # This will be the new folder with images corresponding to labels

# Create the output_images_folder if it doesn't exist
os.makedirs(output_images_folder, exist_ok=True)

# Get a list of label file names without extensions
label_names = {os.path.splitext(file)[0] for file in os.listdir(labels_folder) if os.path.isfile(os.path.join(labels_folder, file))}

# Copy images from images_all to the new folder if they have corresponding labels
for label_name in label_names:
    image_path_all = os.path.join(images_all_folder, label_name + ".jpg")  # Adjust the extension as needed
    image_path_output = os.path.join(output_images_folder, label_name + ".jpg")

    if os.path.isfile(image_path_all):
        shutil.copy(image_path_all, image_path_output)
        print(f"Copied: {label_name}.jpg")

print("Image copying complete.")


In [None]:
len(os.listdir("/content/images"))

100

In [None]:
!rm -rf "/content/data"

In [None]:
!cp -r "/content/images" "/content/HCFA_100_yolo"

In [None]:
!zip "/content/HCFA_100_yolo" ""

In [None]:
!zip -r "HCFA_100_yolo.zip" "/content/HCFA_100_yolo/"

# COCO Format

### Load Dataset

In [None]:
!zip -FF "/content/drive/MyDrive/001_projects/FSL/data/human_annotated/Dental_ADA/ADA_421_images.zip" --out "repaired.zip"
!unzip "repaired.zip"

### Only taking images that are in annotation

In [None]:
import json
import os
import shutil


# Path to the result.json file
result_json_path = '/content/ADA_421_images/result.json'

# Path to the images folder
images_folder_path = '/content/ADA_421_images/images'

selected_images_folder = "/content/images"

os.makedirs(selected_images_folder, exist_ok=True)
# Load the result.json file
with open(result_json_path, 'r') as json_file:
    result_data = json.load(json_file)

# Extract image filenames from the result.json file
image_filenames_in_result = [entry['file_name'] for entry in result_data['images']]

# Check if the corresponding image files exist in the images folder
existing_image_filenames = [filename for filename in image_filenames_in_result if os.path.exists(os.path.join(images_folder_path, filename.split("\\")[1]))]

# Display or use the list of existing image filenames
print(existing_image_filenames)


# Copy the existing image files to the selected images folder
for filename in existing_image_filenames:

  filename = filename.split("\\")[1]
  source_path = os.path.join(images_folder_path, filename)
  destination_path = os.path.join(selected_images_folder, filename)
  shutil.copy(source_path, destination_path)


['images\\2027C467D017_001.jpg', 'images\\2027C43GD010_001.jpg', 'images\\2027C43UD020_001.jpg', 'images\\2027C48FD045_001.jpg', 'images\\2027C47TD017_001.jpg', 'images\\2027C469D005_001.jpg', 'images\\2027C45KD007_001.jpg', 'images\\2027C43TD042_001.jpg', 'images\\2027C48HD020_001.jpg', 'images\\2027C43OD010_001.jpg', 'images\\2027C43GD043_001.jpg', 'images\\2027C43ID007_001.jpg', 'images\\2027C469D029_001.jpg', 'images\\2027C43RD013_001.jpg', 'images\\2027C43ED011_001.jpg', 'images\\2027C45KD008_001.jpg', 'images\\2027C46BD002_001.jpg', 'images\\2027C43QD006_001.jpg', 'images\\2027C46DD001_001.jpg', 'images\\2027C43FD002_001.jpg', 'images\\2027C43HD021_001.jpg', 'images\\2027C43GD011_001.jpg', 'images\\2027C43KD003_001.jpg', 'images\\2027C43HD003_001.jpg', 'images\\2027C462D025_001.jpg', 'images\\2027C44YD050_001.jpg', 'images\\2027C44XD047_001.jpg', 'images\\2027C45AD047_001.jpg', 'images\\2027C43FD006_001.jpg', 'images\\2027C469D018_001.jpg', 'images\\2027C43TD008_001.jpg', 'images

In [None]:
len(existing_image_filenames)

422

In [None]:
len(os.listdir("/content/ADA_421_images/images"))

2922

In [None]:
import json
import os
from sklearn.model_selection import train_test_split

# Path to the result.json file
result_json_path = '/content/ADA_421_images/result.json'

# Load the result.json file
with open(result_json_path, 'r') as json_file:
    result_data = json.load(json_file)

# Extract image filenames and annotations from the result.json file
images_info = result_data['images']
annotations_info = result_data['annotations']

# Extract image filenames from the result.json file
image_filenames_in_result = [entry['file_name'] for entry in images_info]

# Split the dataset into train and validation sets
train_images, valid_images = train_test_split(image_filenames_in_result, test_size=0.01, random_state=42)

# Filter annotations for train and validation sets
train_annotations = [annotation for annotation in annotations_info if annotation['image_id'] in [entry['id'] for entry in images_info if entry['file_name'] in train_images]]
valid_annotations = [annotation for annotation in annotations_info if annotation['image_id'] in [entry['id'] for entry in images_info if entry['file_name'] in valid_images]]

# Create dictionaries for train and validation JSON files
train_json = {
    'images': [entry for entry in images_info if entry['file_name'] in train_images],
    'annotations': train_annotations,
    'categories': result_data['categories']
}

valid_json = {
    'images': [entry for entry in images_info if entry['file_name'] in valid_images],
    'annotations': valid_annotations,
    'categories': result_data['categories']
}

# Save train.json
with open('/content/train.json', 'w') as train_file:
    json.dump(train_json, train_file)

# Save valid.json
with open('/content/valid.json', 'w') as valid_file:
    json.dump(valid_json, valid_file)


In [None]:
import os
import cv2
import json
import albumentations as A
from pathlib import Path

# from albumentations.augmentations.bbox_utils import convert_bbox_from_albumentations, convert_bbox_to_albumentations


def convert_bbox_from_albumentations(bbox):
    x, y, width, height = bbox

    # Convert albumentations format (x, y, width, height) to COCO format (x_min, y_min, width, height)
    x_min = x
    y_min = y
    x_max = x + width
    y_max = y + height
    return [x_min, y_min, x_max, y_max]

def load_annotations(annotations_file):
    with open(annotations_file, 'r') as f:
        annotations = json.load(f)
    return annotations

def save_annotations(annotations, output_file):
    with open(output_file, 'w') as f:
        json.dump(annotations, f, indent=2)

def augment_and_save_images(images_dir, output_dir, annotations_file, num_augmentations):
    os.makedirs(output_dir, exist_ok=True)

    annotations = load_annotations(annotations_file)
    augmented_annotations = {'info': "None", 'licenses': "None", 'categories': annotations['categories'], 'images': [], 'annotations': []}

    for image_info in annotations['images']:
        image_id = image_info['id']
        image_path = os.path.join(images_dir, image_info['file_name'].split('\\')[-1])

        print("image_path", image_path)
        image = cv2.imread(image_path)
        bboxes = [ann['bbox'] for ann in annotations['annotations'] if ann['image_id'] == image_id]

        for aug_id in range(num_augmentations):
            transform = A.Compose([
                # Define your augmentations here
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                # Add more augmentations as needed
            ], bbox_params=A.BboxParams(format='coco', label_fields=['category_ids']))

            augmented = transform(image=image, bboxes=bboxes, category_ids=[1]*len(bboxes))
            augmented_image = augmented['image']

            print("boxes---->>>", augmented['bboxes'])
            augmented_bboxes = convert_bbox_from_albumentations(augmented['bboxes'])

            # Save augmented image
            augmented_image_path = os.path.join(output_dir, f"{image_id}_aug{aug_id}.jpg")
            cv2.imwrite(augmented_image_path, augmented_image)

            # Update annotations for the augmented image
            for i, ann in enumerate(annotations['annotations']):
                if ann['image_id'] == image_id:
                    new_ann = ann.copy()
                    new_ann['id'] = len(augmented_annotations['annotations']) + 1
                    new_ann['image_id'] = len(augmented_annotations['images']) + 1
                    new_ann['bbox'] = augmented_bboxes[i]
                    augmented_annotations['annotations'].append(new_ann)

            # Update image information
            augmented_image_info = image_info.copy()
            augmented_image_info['id'] = len(augmented_annotations['images']) + 1
            augmented_image_info['file_name'] = f"{image_id}_aug{aug_id}.jpg"
            augmented_annotations['images'].append(augmented_image_info)
        break
    # Save annotations to the output file
    output_annotations_file = os.path.join(output_dir, "train_augmented.json")
    save_annotations(augmented_annotations, output_annotations_file)


if __name__ == "__main__":
    images_directory = "/content/images"
    output_directory = "/content/ada_augmented_data"
    annotations_file_path = "/content/train.json"
    num_augmentations = 5  # Adjust as needed

    augment_and_save_images(images_directory, output_directory, annotations_file_path, num_augmentations)


image_path /content/images/2027C467D017_001.jpg
boxes---->>> [(1023.5215113367432, 2390.4549999999836, 265.34453401021017, 50.6899999999996), (1279.4609319795752, 1501.0699999999977, 169.74906802042528, 46.69999999999982), (1560.6000000000001, 1052.4799999999993, 193.9687128834919, 50.22000000000003), (686.3920161757742, 645.7158170665075, 123.5629838242254, 58.204182933491666), (108.35197732653667, 1498.2049999999927, 209.72627208169007, 45.84999999999968), (813.2700000000001, 1244.5349999999999, 62.539999999999736, 56.67000000000007), (1279.2209319795763, 1551.389999999996, 174.09209069390022, 47.559999999999945), (1456.6074433163128, 1499.4849999999988, 171.77906802042526, 45.930000000000064), (1276.4089546530502, 1450.5249999999992, 175.73906802042552, 47.50999999999999), (542.9649999999998, 901.4850000000002, 126.50999999999976, 50.469999999999914), (1775.6674257669833, 1051.1250000000005, 157.50257423301673, 51.08999999999992), (2087.385, 759.59, 178.10999999999967, 43.2400000000

ValueError: ignored

In [None]:
import albumentations.augmentations.bbox_utils

ModuleNotFoundError: ignored

In [None]:
load_annotations(annotations_file_path)

{'images': [{'width': 2602,
   'height': 3360,
   'id': 0,
   'file_name': 'images\\2027C467D017_001.jpg'},
  {'width': 2603,
   'height': 3348,
   'id': 1,
   'file_name': 'images\\2027C43GD010_001.jpg'},
  {'width': 2635,
   'height': 3391,
   'id': 2,
   'file_name': 'images\\2027C43UD020_001.jpg'},
  {'width': 2599,
   'height': 3350,
   'id': 3,
   'file_name': 'images\\2027C48FD045_001.jpg'},
  {'width': 2594,
   'height': 3358,
   'id': 4,
   'file_name': 'images\\2027C47TD017_001.jpg'},
  {'width': 2599,
   'height': 3357,
   'id': 5,
   'file_name': 'images\\2027C469D005_001.jpg'},
  {'width': 2604,
   'height': 3359,
   'id': 6,
   'file_name': 'images\\2027C45KD007_001.jpg'},
  {'width': 2596,
   'height': 3360,
   'id': 7,
   'file_name': 'images\\2027C43TD042_001.jpg'},
  {'width': 2599,
   'height': 3354,
   'id': 8,
   'file_name': 'images\\2027C48HD020_001.jpg'},
  {'width': 2595,
   'height': 3360,
   'id': 9,
   'file_name': 'images\\2027C43OD010_001.jpg'},
  {'width'