In [None]:
import os
import shutil
from pathlib import Path
import yaml
from google.colab import files

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# ========= KONFIGURASI =========
dataset_paths = {
    "ds1": "/content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/Fall-Detection-CaucaFall-5",  # ganti sesuai lokasi masing-masing dataset
    "ds2": "/content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/First-Pedestrian-Test-1",
    "ds3": "/content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/Human-Action-Recognition-2000-9",
    "ds4": "/content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/Human-Fall-detection-8",
    "ds5": "/content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/exceptional-situations-5",
}

output_dir = Path("/content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/All-In-One-Dataset")

# Mapping label per dataset
label_map = {
    "ds1": {"fall": "fall", "nofall": "nofall"},
    "ds2": {"Pedestrians": "nofall"},
    "ds3": {"berdiri": "nofall", "berjalan": "nofall", "berlari": "nofall", "jatuh": "fall"},
    "ds4": {"falling": "fall", "sitting": "nofall", "standing": "nofall", "walking": "nofall"},
    "ds5": {
        "fall": "fall", "sit": "nofall", "sleep": "nofall", "standing": "nofall"
        # labels seperti Violence/fire/violence tidak di-include
    }
}

In [None]:
from google.colab import drive
import os

# 2. Masukkan path folder yang ingin dihitung ukurannya
# Misal folder di MyDrive bernama 'Datasets', maka path-nya:
folder_path = dataset_paths['ds1']

# 3. Fungsi untuk menghitung ukuran folder
def get_folder_size(folder):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(folder):
        for file in filenames:
            fp = os.path.join(dirpath, file)
            total_size += os.path.getsize(fp)
    return total_size

# 4. Fungsi bantu untuk mengubah ukuran byte ke format yang lebih mudah dibaca
def format_size(bytes_size):
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if bytes_size < 1024:
            return f"{bytes_size:.2f} {unit}"
        bytes_size /= 1024
    return f"{bytes_size:.2f} TB"

# 5. Hitung dan tampilkan ukuran folder
folder_size_bytes = get_folder_size(folder_path)
print(f"Ukuran folder '{folder_path}': {format_size(folder_size_bytes)}")


Ukuran folder '/content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/Fall-Detection-CaucaFall-5': 813.67 MB


In [None]:
# ========= BANTUAN =========
def get_names(ds_path):
    yaml_path = Path(ds_path) / "data.yaml"
    with open(yaml_path, "r") as f:
        data = yaml.safe_load(f)
    return data["names"]

def get_class_index(label, ds_key):
    mapped = label_map.get(ds_key, {}).get(label)
    if mapped == "fall":
        return 0
    elif mapped == "nofall":
        return 1
    else:
        return None

def process_dataset(ds_key, ds_path, x):
    names = get_names(ds_path)
    print(f'Memproses {ds_path}....')
    if ds_key == 'ds1':
      return 0
    for subset in ["train", "valid", "test"]:
        print(f'Memproses {ds_path}/{subset}....')
        image_dir = Path(ds_path) / subset / "images"
        label_dir = Path(ds_path) / subset / "labels"
        if not image_dir.exists() or not label_dir.exists():
            print(f"Folder {subset} tidak ditemukan di {ds_path}")
            continue

        for label_file in label_dir.glob("*.txt"):
            print(f'Memproses {label_file}')
            with open(label_file, "r") as f:
                lines = f.readlines()

            new_lines = []
            original_name = label_file.stem
            for line in lines:
                parts = line.strip().split()
                if not parts:
                    continue
                class_idx = int(parts[0])
                if class_idx >= len(names):
                    continue
                class_name = names[class_idx]
                new_idx = get_class_index(class_name, ds_key)
                if new_idx is not None:
                    parts[0] = str(new_idx)
                    new_lines.append(" ".join(parts) + "\n")

            if new_lines:
                # Copy image & save label
                for ext in [".jpg", ".png", ".jpeg"]:
                    src_img = image_dir / f"{original_name}{ext}"
                    if src_img.exists():
                        break
                else:
                    continue  # skip if image not found

                dst_img = output_dir / subset / "images" / f"{ds_key}_{original_name}{src_img.suffix}"
                dst_lbl = output_dir / subset / "labels" / f"{ds_key}_{original_name}.txt"

                # ðŸ”§ Perbaikan: pastikan direktori ada
                dst_img.parent.mkdir(parents=True, exist_ok=True)
                dst_lbl.parent.mkdir(parents=True, exist_ok=True)

                shutil.copy(src_img, dst_img)
                with open(dst_lbl, "w") as f:
                    f.writelines(new_lines)
            print(f"Berhasil memproses dataset ke-{x}")
            x += 1
    return x

In [None]:
# ========= PROSES SEMUA =========
c = 0

for x in ['ds4', 'ds5']:
    ds_key = x
    ds_path = dataset_paths[ds_key]
    c += process_dataset(ds_key, ds_path, c)

# ========= TULIS FILE data.yaml =========
with open(output_dir / "data.yaml", "w") as f:
    f.write(
        "train: train/images\n"
        "val: valid/images\n"
        "test: test/images\n"
        "nc: 2\n"
        "names: ['fall', 'nofall']\n"
    )

print("âœ… Dataset berhasil digabungkan ke:", output_dir)

[1;30;43mOutput streaming akan dipotong hingga 5000 baris terakhir.[0m
Berhasil memproses dataset ke-10489
Memproses /content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/exceptional-situations-5/train/labels/Screenshot-2022-08-09-105054_jpg.rf.afe10c3a0b5d9b15e677716c86b0d3ff.txt
Berhasil memproses dataset ke-10490
Memproses /content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/exceptional-situations-5/train/labels/13_jpg.rf.bf2158e851692051de3fcdad97d8ca62.txt
Berhasil memproses dataset ke-10491
Memproses /content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/exceptional-situations-5/train/labels/A-450-_png_jpg.rf.c51fca68b148e47a0f3e8088cc3bfbbd.txt
Berhasil memproses dataset ke-10492
Memproses /content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/exceptional-situations-5/train/labels/sit17_jpg.rf.f4ab6468b15b5ed790354c367cb47fa9.txt
Berhasil memproses dataset ke-10493
Memproses /content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/exceptional-situations-5/

In [None]:
import os
def count_files(path):
    return len([f for f in os.listdir(path) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.txt')])

print('============List Dataset============')
splits = ['train', 'valid', 'test']
split_count = {}
temp = '/content/All-In-One-Dataset'
total = 0
for split in splits:
    img_path = f"{temp}/{split}/images"
    lbl_path = f"{temp}/{split}/labels"
    split_count[split] = count_files(img_path)
    total += split_count[split]

for i, split in enumerate(splits):
    print(f"{split.upper()} - Images: {split_count[i]}, Percentage : {float(split_count[i])/float(total):.2f}")
print(f'Total =', total, '\n')


TRAIN - Images: 29272, Labels: 29272
VALID - Images: 4723, Labels: 4723
TEST - Images: 982, Labels: 982
Total = 34977 



In [None]:
import shutil

# Ganti dengan path folder dataset yang digunakan
folder_path = '/content/drive/MyDrive/Skripton_(Skripsi_Marathon)/Dataset/All-In-One-Dataset'
output_zip = '/content/AIO_Dataset.zip'  # hasil zip akan disimpan di workspace Colab

# Kompres folder jadi ZIP
shutil.make_archive('/content/AIO_Dataset', 'zip', folder_path)

# Unduh file ZIP
files.download(output_zip)


# Rombak Lanjutan

In [None]:
#Mendownload Dataset AIO
# Install gdown jika belum ada
!pip install -q gdown

import gdown
import zipfile
import os

def downextract(url = [], extract_to = '/content', nama = 'default'):
  # Ganti dengan File ID dari Google Drive

  for i, j in enumerate(url):
    # Download file dari Google Drive
    gdown.download(j, f'dataset_{nama}_{i+1}.zip', quiet=False)

    # Ekstraksi zip setelah unduh
    os.makedirs(extract_to, exist_ok=True)  # Buat folder tujuan jika belum ada
    with zipfile.ZipFile(f'dataset_{nama}_{i+1}.zip', 'r') as zip_ref:
        zip_ref.extractall(extract_to)

  print(f"âœ… File berhasil diunduh dan diekstrak ke: {extract_to}")


In [None]:
downextract(['https://drive.google.com/uc?id=1PPWhdbidrjbpa-jImb77eDnYkvBsQSnv','https://drive.google.com/uc?id=1O_7ZzkS4FbRk2lY_7pvHAMnc_TOwQ6V5'], nama='AIO')

Downloading...
From (original): https://drive.google.com/uc?id=1PPWhdbidrjbpa-jImb77eDnYkvBsQSnv
From (redirected): https://drive.google.com/uc?id=1PPWhdbidrjbpa-jImb77eDnYkvBsQSnv&confirm=t&uuid=2897e322-ce11-4def-9105-cc542fb747e4
To: /content/dataset_AIO_1.zip
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1.67G/1.67G [00:40<00:00, 40.9MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1O_7ZzkS4FbRk2lY_7pvHAMnc_TOwQ6V5
From (redirected): https://drive.google.com/uc?id=1O_7ZzkS4FbRk2lY_7pvHAMnc_TOwQ6V5&confirm=t&uuid=bd68d68d-4318-45fd-8ac7-ee1f7f449cce
To: /content/dataset_AIO_2.zip
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 56.3M/56.3M [00:01<00:00, 29.9MB/s]


âœ… File berhasil diunduh dan diekstrak ke: /content


In [None]:
import os
import shutil
import random

# Path dataset YOLO
dataset_path = "/content/All-In-One-Dataset"  # ganti sesuai dataset

valid_img_dir = os.path.join(dataset_path, "valid", "images")
valid_lbl_dir = os.path.join(dataset_path, "valid", "labels")

test_img_dir = os.path.join(dataset_path, "test", "images")
test_lbl_dir = os.path.join(dataset_path, "test", "labels")

# Pastikan folder test ada
os.makedirs(test_img_dir, exist_ok=True)
os.makedirs(test_lbl_dir, exist_ok=True)

# Ambil daftar gambar di valid
img_files = [f for f in os.listdir(valid_img_dir) if f.endswith((".jpg", ".png", ".jpeg"))]

# Pastikan jumlah cukup
if len(img_files) < 900:
    raise ValueError(f"Hanya ada {len(img_files)} gambar di valid, tidak cukup untuk pindahkan 900.")

# Ambil 900 gambar secara acak
img_files_to_move = random.sample(img_files, 900)

count_moved = 0
for img_file in img_files_to_move:
    lbl_file = os.path.splitext(img_file)[0] + ".txt"

    src_img = os.path.join(valid_img_dir, img_file)
    src_lbl = os.path.join(valid_lbl_dir, lbl_file)

    dst_img = os.path.join(test_img_dir, img_file)
    dst_lbl = os.path.join(test_lbl_dir, lbl_file)

    # pindahkan gambar
    if os.path.exists(src_img):
        shutil.move(src_img, dst_img)

    # pindahkan label (kalau ada)
    if os.path.exists(src_lbl):
        shutil.move(src_lbl, dst_lbl)

    count_moved += 1

print(f"Selesai! Total file gambar yang dipindahkan: {count_moved}")


Selesai! Total file gambar yang dipindahkan: 900


In [None]:
import os
def count_files(path):
    return len([f for f in os.listdir(path) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.txt')])

print('============List Dataset============')
splits = ['train', 'valid', 'test']
split_count = {}
temp = '/content/All-In-One-Dataset'
total = 0
for split in splits:
    img_path = f"{temp}/{split}/images"
    lbl_path = f"{temp}/{split}/labels"
    split_count[split] = count_files(img_path)
    total += split_count[split]

for i, split in enumerate(splits):
    print(f"{split.upper()} - Images: {split_count[split]}, Percentage : {float(split_count[split])/float(total):.2f}")
print(f'Total =', total, '\n')


TRAIN - Images: 29272, Percentage : 0.84
VALID - Images: 3823, Percentage : 0.11
TEST - Images: 1882, Percentage : 0.05
Total = 34977 



In [None]:
!zip -r -q NEW_AIO_3.zip /content/All-In-One-Dataset
!cp /content/NEW_AIO_3.zip /content/drive/MyDrive/SKRIPSI_2106020_Renaldy_Azhari_Imaduddin/Dataset
