In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [3]:
dataset_path = '/content/drive/MyDrive/bisindo_dataset'

In [4]:
def is_file_accessible(file_path):
    try:
        img = cv2.imread(file_path)
        if img is None:
            return False
        return True
    except:
        return False

def analyze_dataset(dataset_path):
    class_data = []
    image_shapes = set()
    total_images = 0
    inaccessible_images_total = 0
    inconsistent_shapes = False
    inaccesible_images = False

    for label in sorted(os.listdir(dataset_path)):
        label_path = os.path.join(dataset_path, label)
        if not os.path.isdir(label_path):
            continue

        image_files = [
            f for f in os.listdir(label_path)
            if f.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]
        count = len(image_files)
        total_images += count

        # Count inaccessible images
        inaccessible_count = 0
        for img_file in image_files:
            img_path = os.path.join(label_path, img_file)
            if not is_file_accessible(img_path):
                inaccessible_count += 1
                inaccesible_images = True
        inaccessible_images_total += inaccessible_count

        # Get sample image names
        sample_files = ", ".join(image_files[:3]) if image_files else "-"

        # Check shape from first few images
        local_shapes = set()
        for img_file in image_files[:5]:
            img_path = os.path.join(label_path, img_file)
            img = cv2.imread(img_path)
            if img is not None:
                local_shapes.add(img.shape)
                image_shapes.add(img.shape)

        # Record potential shape inconsistency
        if len(local_shapes) > 1:
            inconsistent_shapes = True

        class_data.append({
            "Label": label,
            "Number of Images": count,
            "Inaccessible Images": inaccessible_count,
            "Sample Filenames": sample_files,
            "Detected Shapes": list(local_shapes)
        })

    # Convert to DataFrame
    df = pd.DataFrame(class_data)
    df = df.sort_values("Label").reset_index(drop=True)

    # Display table
    print(f"Total Classes: {len(df)}")
    print(f"Total Images: {total_images}")
    print(f"Total Inaccessible Images: {inaccessible_images_total}")
    if inconsistent_shapes or len(image_shapes) > 1:
        print("Warning: Multiple image resolutions detected.")

    if inaccessible_images_total > 0:
        print("Warning: Inaccessible images found.")
        inaccesible_images = True

    return df, inaccesible_images

# Run and display the table
dataset_df, contains_inaccesible = analyze_dataset(dataset_path)
dataset_df


Total Classes: 26
Total Images: 4763
Total Inaccessible Images: 297


Unnamed: 0,Label,Number of Images,Inaccessible Images,Sample Filenames,Detected Shapes
0,A,181,16,"A_Bebas_Oppo_A Series.jpg, A_polos gelap_iphon...","[(2162, 2162, 3), (2544, 2544, 3), (1561, 1561..."
1,B,185,16,"B_polos gelap_Samsung_A33.jpg, B_STRIPE_SAMSUN...","[(1404, 1404, 3), (1496, 1496, 3), (2544, 2544..."
2,C,183,14,"C_background baju_Samsung_A33.jpg, C_polos gel...","[(1617, 1500, 3), (2544, 2544, 3), (1532, 1500..."
3,D,182,12,"D_Baju_Xiaomi_MI T10.jpg, D_polos terang_Samsu...","[(1508, 1508, 3), (1391, 1391, 3), (2322, 2322..."
4,E,180,11,"E_Baju_Xiaomi_MI T10.jpg, E_Terang_Xiaomi_MI T...","[(1433, 1433, 3), (2544, 2544, 3), (1348, 1348..."
5,F,184,11,"F_Baju_Xiaomi_MI T10.jpg, F_Terang_Xiaomi_MI T...","[(1388, 1388, 3), (4074, 4074, 3), (1266, 1266..."
6,G,184,11,"G_Bebas_Xiaomi_MI T10.jpg, G_polos terang_Sams...","[(2544, 2544, 3), (1379, 1379, 3), (1892, 1892..."
7,H,184,11,"H_Baju_Xiaomi_MI T10.jpg, H_polos gelap_Samsun...","[(2544, 2544, 3), (1430, 1430, 3), (1426, 1426..."
8,I,184,11,"I_Baju_Xiaomi_MI T10.jpg, I_Terang_Xiaomi_MI T...","[(2544, 2544, 3), (1438, 1438, 3), (1510, 1510..."
9,J,181,12,"J_Baju_Xiaomi_MI T10.jpg, J_Baju_Realme_7i.jpg...","[(1442, 1442, 3), (3072, 3072, 3), (4074, 4074..."


In [5]:
# Deleting Inaccesible Image
def delete_inaccessible_images(dataset_path):
    for label in sorted(os.listdir(dataset_path)):
        label_path = os.path.join(dataset_path, label)
        if not os.path.isdir(label_path):
            continue

        image_files = [
            f for f in os.listdir(label_path)
            if f.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]

        for img_file in image_files:
            img_path = os.path.join(label_path, img_file)
            if not is_file_accessible(img_path):
                os.remove(img_path)
                print(f"Deleted inaccessible image: {img_path}")

# Running the delete function, if any inaccesible image is found
if contains_inaccesible:
    delete_inaccessible_images(dataset_path)
    print("All Inaccessible images deleted.")
else:
    print("No inaccessible images found.")

Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_polos gelap_iphone_xr.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_baju_iphone_xr.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_bebas_iphone_xr.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_polos terang_iphone_xr.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_Bebas_Iphone_13promax.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_polos baju_Iphone_13promax.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_polos terang_Iphone_13promax.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_Baju_Apple_Iphone 15.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/._20240509_182802.jpg
Deleted inaccessible image: /content/drive/MyDrive/bisindo_dataset/A/A_Polos Terang_Apple_Iphone 15.jpg
Deleted inaccessible image: /c