In [11]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D

In [12]:
base_model = ResNet50(
    weights="imagenet",
    include_top=False,
    input_shape=(224, 224, 3)
)

x = base_model.output
x = GlobalAveragePooling2D()(x)
model = Model(base_model.input, x)

In [13]:
def preprocess_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = np.expand_dims(img, axis=0)
    return preprocess_input(img)


In [14]:
def extract_features(dataset_path):
    X, y = [], []
    for cls in sorted(os.listdir(dataset_path)):
        cls_path = os.path.join(dataset_path, cls)
        if not os.path.isdir(cls_path):
            continue

        for img_name in tqdm(os.listdir(cls_path), desc=cls):
            img = cv2.imread(os.path.join(cls_path, img_name))
            if img is None:
                continue

            img = preprocess_image(img)
            feat = model.predict(img, verbose=0)
            X.append(feat[0])   # 2048 features
            y.append(cls)

    return np.array(X), np.array(y)

In [15]:
X_train, y_train = extract_features("../data_split/train_aug")
X_test, y_test = extract_features("../data_split/test")

os.makedirs("../features", exist_ok=True)

np.save("../features/X_train_cnn.npy", X_train)
np.save("../features/X_test_cnn.npy", X_test)
np.save("../features/y_train.npy", y_train)
np.save("../features/y_test.npy", y_test)

print("✅ CNN feature extraction completed")
print("Feature shape:", X_train.shape)

cardboard: 100%|██████████| 371/371 [01:00<00:00,  6.10it/s]
glass: 100%|██████████| 367/367 [00:57<00:00,  6.38it/s]
metal: 100%|██████████| 371/371 [00:58<00:00,  6.39it/s]
paper: 100%|██████████| 359/359 [01:03<00:00,  5.70it/s]
plastic: 100%|██████████| 364/364 [01:14<00:00,  4.92it/s]
trash: 100%|██████████| 376/376 [01:13<00:00,  5.11it/s]
cardboard: 100%|██████████| 52/52 [00:09<00:00,  5.31it/s]
glass: 100%|██████████| 81/81 [00:14<00:00,  5.50it/s]
metal: 100%|██████████| 66/66 [00:10<00:00,  6.48it/s]
paper: 100%|██████████| 96/96 [00:17<00:00,  5.39it/s]
plastic: 100%|██████████| 78/78 [00:15<00:00,  5.09it/s]
trash: 100%|██████████| 22/22 [00:04<00:00,  4.89it/s]

✅ CNN feature extraction completed
Feature shape: (2208, 2048)



