In [1]:
import kagglehub
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import os, shutil, pathlib
import numpy as np
from sklearn.utils import class_weight

# 1. CLEAN & COLLECT DATA
print("--- Step 1: Force Collecting All Data ---")
src_path = kagglehub.dataset_download("phenomsg/waste-classification")
src_dir = pathlib.Path(src_path)
dest_dir = pathlib.Path('/tmp/waste_pro_v5')

if dest_dir.exists(): shutil.rmtree(dest_dir)
dest_dir.mkdir(parents=True, exist_ok=True)

target_classes = ['Recyclable', 'Organic', 'Hazardous']
for cls in target_classes:
    target_cls_dir = dest_dir / cls
    target_cls_dir.mkdir(exist_ok=True)
    count = 0
    for f in src_dir.rglob('*'):
        if f.is_file() and f.suffix.lower() in ('.jpg', '.jpeg', '.png') and cls.lower() in str(f).lower():
            try:
                img_raw = tf.io.read_file(str(f))
                tf.io.decode_image(img_raw)
                shutil.copy(str(f), str(target_cls_dir / f"{count}_{f.name}"))
                count += 1
            except: continue
    print(f"✅ Found {count} images for {cls}")

# 2. PIPELINE WITH INTENSE AUGMENTATION
print("\n--- Step 2: Building Organic-Focus Pipeline ---")
train_ds = tf.keras.utils.image_dataset_from_directory(
    dest_dir, validation_split=0.2, subset="training", seed=123, image_size=(224, 224), batch_size=32
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    dest_dir, validation_split=0.2, subset="validation", seed=123, image_size=(224, 224), batch_size=32
)

# HEAVY Augmentation to stop the model from being lazy
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.2),
    layers.RandomContrast(0.2), # Helps with Organic textures
    layers.RandomBrightness(0.1)
])

# CALCULATE CLASS WEIGHTS (The Secret Fix)
y_train = np.concatenate([y for x, y in train_ds], axis=0)
weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(weights))
print(f"Applying Class Weights: {class_weights}")

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

# 3. ARCHITECTURE: RESNET50V2 (Smarter than MobileNet)
print("\n--- Step 3: Building ResNet Architecture ---")
base_model = tf.keras.applications.ResNet50V2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
base_model.trainable = False

model = models.Sequential([
    layers.Input(shape=(224, 224, 3)),
    data_augmentation,
    layers.Rescaling(1./255),
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(3, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 4. TRAINING WITH BALANCING
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=6, restore_best_weights=True),
    ModelCheckpoint('waste_pro_final.keras', monitor='val_accuracy', save_best_only=True)
]

print("\n--- Training Phase 1 ---")
model.fit(train_ds, validation_data=val_ds, epochs=15, class_weight=class_weights, callbacks=callbacks)

print("\n--- Training Phase 2: Unfreezing for Organic Texture ---")
base_model.trainable = True # Fully unfreeze
model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_ds, validation_data=val_ds, epochs=10, class_weight=class_weights, callbacks=callbacks)

print("\n--- DONE! Download 'waste_pro_final.keras' ---")

--- Step 1: Force Collecting All Data ---
Using Colab cache for faster access to the 'waste-classification' dataset.
✅ Found 1268 images for Recyclable
✅ Found 651 images for Organic
✅ Found 918 images for Hazardous

--- Step 2: Building Organic-Focus Pipeline ---
Found 2837 files belonging to 3 classes.
Using 2270 files for training.
Found 2837 files belonging to 3 classes.
Using 567 files for validation.
Applying Class Weights: {0: np.float64(1.0365296803652968), 1: np.float64(1.4607464607464606), 2: np.float64(0.7403783431180692)}

--- Step 3: Building ResNet Architecture ---
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94668760/94668760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step

--- Training Phase 1 ---
Epoch 1/15
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 492ms/step - accuracy: 0.5545 - loss: 1.1039 - val_accuracy: 0.7566 - val_loss: 0.