In [10]:
import plotly.express as px
import os
import pandas as pd
import numpy as np
import random
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import warnings
import splitfolders

In [2]:
rootPath = "dataset/garbage_images"
class_names = os.listdir(rootPath)
class_names

['Microwave',
 'paper',
 'cardboard',
 'Television',
 'Player',
 'plastic',
 'trash',
 'glass',
 'Mouse',
 'Keyboard',
 'Washing Machine',
 'Mobile',
 'organic',
 'PCB',
 'metal',
 'Battery',
 'Printer']

In [3]:
sizes = [len(os.listdir(os.path.join(rootPath, class_name))) for class_name in class_names]

fig = px.pie(
    names=class_names,
    values=sizes,
    title="Garbage Image Dataset Distribution",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.show()


In [4]:
print("\n📁 DATASET YAPISI:")
print(f"Toplam kategori sayısı: {len(class_names)}")
print(f"Kategoriler: {class_names}")

category_data = []
total_images = 0

for class_name in class_names:
    class_path = os.path.join(rootPath, class_name)
    if os.path.isdir(class_path):
   
        image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']
        image_files = [f for f in os.listdir(class_path) 
                      if any(f.lower().endswith(ext) for ext in image_extensions)]
        count = len(image_files)
        total_images += count
        category_data.append({
            'Category': class_name,
            'Count': count,
            'Percentage': 0  
        })

for item in category_data:
    item['Percentage'] = (item['Count'] / total_images) * 100

df = pd.DataFrame(category_data)
df = df.sort_values('Count', ascending=False).reset_index(drop=True)

print(f"\nToplam görüntü sayısı: {total_images}")
print("\n📊 KATEGORİ DAĞILIMI:")
print(df.to_string(index=False))


📁 DATASET YAPISI:
Toplam kategori sayısı: 17
Kategoriler: ['Microwave', 'paper', 'cardboard', 'Television', 'Player', 'plastic', 'trash', 'glass', 'Mouse', 'Keyboard', 'Washing Machine', 'Mobile', 'organic', 'PCB', 'metal', 'Battery', 'Printer']

Toplam görüntü sayısı: 5427

📊 KATEGORİ DAĞILIMI:
       Category  Count  Percentage
          paper    594   10.945274
          glass    501    9.231620
        organic    500    9.213193
        plastic    482    8.881518
          metal    410    7.554819
      cardboard    403    7.425834
      Microwave    240    4.422333
         Player    240    4.422333
     Television    240    4.422333
         Mobile    240    4.422333
          Mouse    240    4.422333
Washing Machine    240    4.422333
       Keyboard    240    4.422333
        Battery    240    4.422333
            PCB    240    4.422333
        Printer    240    4.422333
          trash    137    2.524415


In [12]:
def undersample_class (class_dir,target_count = 400 , output_dir = None):
    images = os.listdir(class_dir)
    if len(images)>target_count:
        selected = random.sample(images, target_count)
    else:
        selected = images
    
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
        for img in selected:
            shutil.copy(os.path.join(class_dir, img), os.path.join(output_dir, img))
    return selected





In [23]:

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'  
)

def augment_images(class_dir, target_count=400, output_dir=None):
    images = os.listdir(class_dir)
    current_count = len(images)

    if current_count >= target_count:
        print(f"Already have {current_count} images in {class_dir}, no augmentation needed.")
        return

    if output_dir is None:
        output_dir = class_dir

    # Eğer output_dir ile class_dir aynı değilse dosyaları kopyala
    if class_dir != output_dir:
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        for img in images:
            src = os.path.join(class_dir, img)
            dst = os.path.join(output_dir, img)
            if not os.path.exists(dst):
                shutil.copy(src, dst)

    images = os.listdir(output_dir)  # output_dir içindeki dosyalar
    current_count = len(images)
    i = 0

    while current_count + i < target_count:
        img_name = np.random.choice(images)
        path = os.path.join(output_dir, img_name)  # output_dir'den açıyoruz
        img = Image.open(path).convert('RGB')
        x = np.array(img)
        x = x.reshape((1,) + x.shape)

        # datagen ile augment edilmiş tek bir görüntü oluştur
        for batch in datagen.flow(x,
                                  batch_size=1,
                                  save_to_dir=output_dir,
                                  save_prefix='aug',
                                  save_format='jpeg'):
            i += 1
            break


In [25]:
balanced_dir = "dataset/balanced_waste_images"

for cls in class_names:
    class_path = os.path.join(rootPath, cls)
    output_class_path = os.path.join(balanced_dir, cls)
    os.makedirs(output_class_path, exist_ok=True)

    images = os.listdir(class_path)
    count = len(images)

    if count > 400:
        undersample_class(class_path, 400, output_dir=output_class_path)
    else:
        # Az olanları direkt kopyala
        for img in images:
            print(f"Copying {img} to {output_class_path}")
            shutil.copy(os.path.join(class_path, img), os.path.join(output_class_path, img))

        # Ve augment yap
        augment_images(output_class_path, 400, output_class_path)



Copying Microwave_52.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_295.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_20.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_196.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_29.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_93.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_114.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_48.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_263.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_205.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_208.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_119.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_55.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_217.jpg to dataset/balanced_waste_images/Microwave
Copying Microwave_44.jpg to dataset/bala

In [26]:
sizes = [len(os.listdir(os.path.join("dataset/balanced_waste_images", class_name))) for class_name in class_names]

fig = px.pie(
    names=os.listdir("dataset/balanced_waste_images"),
    values=sizes,
    title="Garbage Image Dataset Distribution",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.show()


In [None]:
splitfolders.ratio(input=rootPath,output='imgs',seed=15,ratio=(.8,.1,.1),group_prefix=None,move=False)