# **----------LIBRARIES IMPORT----------**

In [None]:
import os
import io
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
import json
import cv2
import requests
from PIL import Image
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
from torch.utils.data import Dataset, DataLoader
import tensorflow as tf

# **---------- IMAGE READING ----------**

1. **Function Parameters**
    * `image_path` : address tempat gambar
    * `target_size` : dimensi gambar output yang diinginkan
    * `url` : link gambar

In [None]:
def read_image_from_path(image_path, target_size=None):
    
    # Membaca gambar dari path
    img = cv2.imread(image_path)
    
    # Jika gambar tidak ditemukan, raise error
    if img is None:
        raise ValueError(f"Image not found at : {image_path}")
    
    # Mengubah format warna dari BGR ke RGB
    # OpenCV membaca gambar dalam format BGR, sedangkan kita ingin RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Mengubah ukuran gambar jika target_size diberikan
    if target_size is not None:
        img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
    return img

In [None]:
def read_image_from_url(url, target_size=None):
    
    # Mengambil gambar dari URL
    response = requests.get(url)
    
    # Jika tidak berhasil, raise error
    if response.status_code != 200:
        raise ValueError(f"Could not fetch image fron URL : {url}")
    
    # Membaca gambar dari response content
    # Menggunakan PIL untuk membaca gambar dari bytes
    img = np.array(Image.open(io.BytesIO(response.content)))

    # Mengubah format warna dari BGR ke RGB
    # OpenCV membaca gambar dalam format BGR, sedangkan kita ingin RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Mengubah ukuran gambar jika target_size diberikan
    if target_size is not None:
        img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)
    return img

* `response.content` : berisi raw image bytes
* `io.BytesIO` : mengubah bytes menjadi file-like
* `Image.open()` : membaca image (PIL)

# **---------- IMAGE DISPLAY ----------**

In [None]:
def display_image(image, title=None, cols=1, rows=1, figsize=(10, 10)):
    
    # Fungsi untuk menampilkan gambar menggunakan matplotlib
    plt.figure(figsize=figsize)      
    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    
    for i, img in enumerate(image):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(img)
        if title:
            plt.title(title[i])
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# **---------- ANNOTATION PARSING ----------**

In [None]:
def parse_csv_annotation(csv_path, image_dir):
    
    # Membaca file CSV berisi anotasi
    df = pd.read_csv(csv_path)
    
    annotations = []
    
    for _, row in df.iterrows():
        annotation = {
            'image_path': os.path.join(image_dir, row['filename']) if image_dir else row['filename'],
            'label': row['class_name'] if 'class_name' in row.keys() else row['label'],
        }
        
        # Check bounding box
        if all(x in row for x in ['x_min', 'y_min', 'x_max', 'y_max']):
            annotation['bbox'] = [row['x_min'], row['y_min'], row['x_max'], row['y_max']]
        
        annotations.append(annotation)
    
    return annotations

In [None]:
def parse_json_annotations(json_path, image_dir=None):
    
    # 
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    # 
    id_to_filename = {}
    for image in data['images']:
        id_to_filename[image['id']] = image['file_name']
    
    # 
    id_to_category = {}
    for category in data['categories']:
        id_to_category[category['id']] = category['name']
    
    # 
    annotations_by_image = {}
    for ann in data['annotations']:
        image_id = ann['image_id']
        if image_id not in annotations_by_image:
            annotations_by_image[image_id] = []
        
        annotations_by_image[image_id].append({
            'category_id': ann['category_id'],
            'category_name': id_to_category[ann['category_id']],
            'bbox': ann['bbox'],  # [x, y, width, height] format
            'segmentation': ann.get('segmentation', None),
            'area': ann.get('area', None),
            'iscrowd': ann.get('iscrowd', 0)
        })
    
    # 
    result = []
    for image_id, anns in annotations_by_image.items():
        filename = id_to_filename[image_id]
        result.append({
            'image_id': image_id,
            'image_path': os.path.join(image_dir, filename) if image_dir else filename,
            'annotations': anns
        })
    
    return result

In [None]:
def parse_voc_xml(xml_path, image_dir=None):

    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    filename = root.find('filename').text
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    depth = int(size.find('depth').text)
    
    objects = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        difficult = int(obj.find('difficult').text) if obj.find('difficult') is not None else 0
        
        bbox = obj.find('bndbox')
        xmin = float(bbox.find('xmin').text)
        ymin = float(bbox.find('ymin').text)
        xmax = float(bbox.find('xmax').text)
        ymax = float(bbox.find('ymax').text)
        
        objects.append({
            'name': name,
            'difficult': difficult,
            'bbox': [xmin, ymin, xmax, ymax]
        })
    
    return {
        'filename': filename,
        'image_path': os.path.join(image_dir, filename) if image_dir else filename,
        'width': width,
        'height': height,
        'depth': depth,
        'objects': objects
    }

# **---------- AUGMENTATION ----------**

In [None]:
def basic_augmentation(height=224, width=224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    return A.compose([
        A.Resize(height=height, width=width),
        A.Normalize(mean=mean, 
                    std = std)
    ])

In [None]:
def train_transform(height=224, width=22, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], hflip_chance=0.5, random_brightness_chance=0.2, 
                    shift_limit=0.05, scale_limit=0.05, rotate_limit=15, shift_scale_rotate_chance=0.3,
                    r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, rgb_shift_chance=0.3):
    return A.compose([
        A.Resize(height=height, width=width),
        A.Normalize(mean= mean, 
                    std = std),
        A.HorizontalFlip(p=hflip_chance),
        A.RandomBrightnessContrast(p=random_brightness_chance),
        A.ShiftScaleRotate(shift_limit=shift_limit, scale_limit=scale_limit, 
                           rotate_limit=rotate_limit, p=shift_scale_rotate_chance),
        A.RGBShift(r_shift_limit=r_shift_limit, g_shift_limit=g_shift_limit, b_shift_limit=b_shift_limit, 
                   p=rgb_shift_chance),
        ToTensorV2(),
    ])