In [37]:
import os, shutil
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

RAW_DATA_DIR = '../data/raw'
OUTPUT_DIR = '../data/output_dataset'
MODEL_SAVE_PATH = '../w_flask/vgg16.h5'
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 10


In [38]:
import shutil
import os

for split in ['train', 'val', 'test']:
    checkpoint_path = f"data/output_dataset/{split}/.ipynb_checkpoints"
    if os.path.exists(checkpoint_path):
        shutil.rmtree(checkpoint_path)
        print(f"✅ Removed: {checkpoint_path}")


In [39]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
train_dir = '../data/output_dataset/train'

train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

print(train_generator.class_indices)


Found 259 images belonging to 3 classes.
{'Biodegradable Images': 0, 'Recyclable Images': 1, 'Trash Images': 2}


In [40]:
# Preview image counts
for cls in os.listdir(RAW_DATA_DIR):
    files = os.listdir(os.path.join(RAW_DATA_DIR, cls))
    print(f"{cls}: {len(files)} images")


Biodegradable Images: 130 images
Recyclable Images: 131 images
Trash Images: 130 images


In [41]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define valid image extensions
valid_exts = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')

# Get list of class directories (assuming they are subdirectories in RAW_DATA_DIR)
classes = [d for d in os.listdir(RAW_DATA_DIR) if os.path.isdir(os.path.join(RAW_DATA_DIR, d))]

for cls in classes:
    img_dir = os.path.join(RAW_DATA_DIR, cls)
    all_files = os.listdir(img_dir)
    # Only keep image files
    images = [img for img in all_files if img.lower().endswith(valid_exts)]
    
    if len(images) == 0:
        print(f"Skipping {cls}, no valid images.")
        continue
    
    # Split data: 60% train, 20% validation, 20% test
    train_val, test = train_test_split(images, test_size=0.2, random_state=42)
    train, val = train_test_split(train_val, test_size=0.25, random_state=42)  # 0.25 * 0.8 = 0.2
    
    # Copy files to respective directories
    for img in train:
        shutil.copy(os.path.join(img_dir, img), os.path.join(OUTPUT_DIR, 'train', cls, img))
    
    for img in val:
        shutil.copy(os.path.join(img_dir, img), os.path.join(OUTPUT_DIR, 'val', cls, img))
    
    for img in test:
        shutil.copy(os.path.join(img_dir, img), os.path.join(OUTPUT_DIR, 'test', cls, img))

print("✅ Dataset split into train, val, and test")

✅ Dataset split into train, val, and test


In [42]:
train_dir = os.path.join(OUTPUT_DIR, 'train')
val_dir = os.path.join(OUTPUT_DIR, 'val')

train_gen = ImageDataGenerator(preprocessing_function=preprocess_input,
                               rotation_range=20, zoom_range=0.2, horizontal_flip=True).flow_from_directory(
    train_dir, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical'
)

val_gen = ImageDataGenerator(preprocessing_function=preprocess_input).flow_from_directory(
    val_dir, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical'
)

base = VGG16(weights='imagenet', include_top=False, input_shape=(*IMG_SIZE, 3))
x = Flatten()(base.output)
x = Dense(128, activation='relu')(x)
out = Dense(len(train_gen.class_indices), activation='softmax')(x)
model = Model(base.input, out)

for layer in base.layers:
    layer.trainable = False

model.compile(optimizer=Adam(1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(train_gen, epochs=EPOCHS, validation_data=val_gen)
model.save(MODEL_SAVE_PATH)
print("✅ Model trained and saved!")


Found 259 images belonging to 3 classes.
Found 78 images belonging to 3 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
✅ Model trained and saved!
