# 1|Importıng Modules

In [8]:
import plotly.express as px
import os
import pandas as pd
import numpy as np
import random
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from PIL import Image
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
import warnings
import splitfolders

# 2|EDA

In [None]:
rootPath = "dataset/garbage_images"
class_names = os.listdir(rootPath)
class_names

In [None]:
sizes = [len(os.listdir(os.path.join(rootPath, class_name))) for class_name in class_names]

fig = px.pie(
    names=class_names,
    values=sizes,
    title="Garbage Image Dataset Distribution",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.show()


In [None]:
print("\n📁 DATASET STRUCTURE:")
print(f"Total number of categories: {len(class_names)}")
print(f"Categories: {class_names}")

category_data = []
total_images = 0

for class_name in class_names:
    class_path = os.path.join(rootPath, class_name)
    if os.path.isdir(class_path):
   
        image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff']
        image_files = [f for f in os.listdir(class_path) 
                      if any(f.lower().endswith(ext) for ext in image_extensions)]
        count = len(image_files)
        total_images += count
        category_data.append({
            'Category': class_name,
            'Count': count,
            'Percentage': 0  
        })

for item in category_data:
    item['Percentage'] = (item['Count'] / total_images) * 100

df = pd.DataFrame(category_data)
df = df.sort_values('Count', ascending=False).reset_index(drop=True)

print(f"\nTotal number of images: {total_images}")
print("\n📊 CATEGORY DISTRIBUTION:")
print(df.to_string(index=False))


# 3|Data Spliting

In [12]:
def undersample_class (class_dir,target_count = 400 , output_dir = None):
    images = os.listdir(class_dir)
    if len(images)>target_count:
        selected = random.sample(images, target_count)
    else:
        selected = images
    
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
        for img in selected:
            shutil.copy(os.path.join(class_dir, img), os.path.join(output_dir, img))
    return selected





In [15]:

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'  
)

def augment_images(class_dir, target_count=400, output_dir=None):
    images = os.listdir(class_dir)
    current_count = len(images)

    if current_count >= target_count:
        print(f"Already have {current_count} images in {class_dir}, no augmentation needed.")
        return

    if output_dir is None:
        output_dir = class_dir

    if class_dir != output_dir:
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        for img in images:
            src = os.path.join(class_dir, img)
            dst = os.path.join(output_dir, img)
            if not os.path.exists(dst):
                shutil.copy(src, dst)

    images = os.listdir(output_dir)  
    current_count = len(images)
    i = 0

    while current_count + i < target_count:
        img_name = np.random.choice(images)
        path = os.path.join(output_dir, img_name)  
        img = Image.open(path).convert('RGB')
        x = np.array(img)
        x = x.reshape((1,) + x.shape)

        for batch in datagen.flow(x,                            
                                  batch_size=32,
                                  save_to_dir=output_dir,
                                  save_prefix='aug',
                                  save_format='jpeg'):
            i += 1
            break


In [None]:
balanced_dir = "dataset/balanced_waste_images"

for cls in class_names:
    class_path = os.path.join(rootPath, cls)
    output_class_path = os.path.join(balanced_dir, cls)
    os.makedirs(output_class_path, exist_ok=True)

    images = os.listdir(class_path)
    count = len(images)

    if count > 400:
        undersample_class(class_path, 400, output_dir=output_class_path)
    else:
        for img in images:
            print(f"Copying {img} to {output_class_path}")
            shutil.copy(os.path.join(class_path, img), os.path.join(output_class_path, img))

        augment_images(output_class_path, 400, output_class_path)



In [None]:
sizes = [len(os.listdir(os.path.join("dataset/balanced_waste_images", class_name))) for class_name in class_names]

rootPath = "dataset/balanced_waste_images"
class_names = os.listdir(rootPath)

fig = px.pie(
    names=class_names,
    values=sizes,
    title="Garbage Image Dataset Distribution",
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig.show()


In [None]:
splitfolders.ratio(input=rootPath,output='imgs',seed=15,ratio=(.8,.1,.1),group_prefix=None,move=False)

In [36]:
train_dir = 'imgs/train'
val_dir = 'imgs/val'
test_dir = 'imgs/test'

IMG_SIZE = 224
BATCH_SIZE = 32
CLASSES_NUM = 17

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_gen = val_datagen.flow_from_directory(
    val_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
    shuffle=True
)

test_gen= test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False 
)


# 4|CNN MODEL


In [31]:
base_model = MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False 

In [33]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(CLASSES_NUM, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)


In [34]:
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10  
)

model.save("WasteClassification_model.h5")