In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D

In [2]:
base_model = ResNet50(
    weights="imagenet",
    include_top=False, # remove the last layer for having features only
    input_shape=(224, 224, 3)
)

x = GlobalAveragePooling2D()(base_model.output)
model = Model(base_model.input, x)
print(f"Feature extractor output shape: {model.output_shape}")

âœ… Feature extractor output shape: (None, 2048)


In [3]:
def preprocess_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = np.expand_dims(img, axis=0)
    return preprocess_input(img)


In [4]:
def extract_features(dataset_path):
    X, y = [], []
    for cls in sorted(os.listdir(dataset_path)):
        cls_path = os.path.join(dataset_path, cls)
        if not os.path.isdir(cls_path):
            continue

        for img_name in tqdm(os.listdir(cls_path), desc=cls):
            img = cv2.imread(os.path.join(cls_path, img_name))
            if img is None:
                continue

            img = preprocess_image(img)
            feat = model.predict(img, verbose=0)
            X.append(feat[0])   # 2048 features
            y.append(cls)

    return np.array(X), np.array(y)

In [5]:
print("\nExtracting training features...")
X_train, y_train = extract_features("../data_split/train_aug")

print("\nExtracting validation features...")
X_val, y_val = extract_features("../data_split/val")

print("\nExtracting test features...")
X_test, y_test = extract_features("../data_split/test")

# Save all features
os.makedirs("../features", exist_ok=True)
np.save("../features/X_train_cnn.npy", X_train)
np.save("../features/X_val_cnn.npy", X_val)
np.save("../features/X_test_cnn.npy", X_test)
np.save("../features/y_train.npy", y_train)
np.save("../features/y_val.npy", y_val)
np.save("../features/y_test.npy", y_test)

print(f"\n CNN feature extraction completed")
print(f"   Train shape: {X_train.shape}")
print(f"   Val shape:   {X_val.shape}")
print(f"   Test shape:  {X_test.shape}")


ðŸ”„ Extracting training features...


cardboard: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 332/332 [00:51<00:00,  6.50it/s]
glass: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 332/332 [00:48<00:00,  6.85it/s]
metal: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 333/333 [00:51<00:00,  6.49it/s]
paper: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 321/321 [00:49<00:00,  6.45it/s]
plastic: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 323/323 [00:46<00:00,  6.88it/s]
trash: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 339/339 [00:54<00:00,  6.19it/s]



ðŸ”„ Extracting validation features...


cardboard: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 39/39 [00:05<00:00,  7.46it/s]
glass: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 60/60 [00:07<00:00,  7.93it/s]
metal: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 49/49 [00:08<00:00,  6.09it/s]
paper: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 71/71 [00:10<00:00,  6.66it/s]
plastic: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 58/58 [00:08<00:00,  6.87it/s]
trash: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 16/16 [00:02<00:00,  6.64it/s]



ðŸ”„ Extracting test features...


cardboard: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 39/39 [00:05<00:00,  6.92it/s]
glass: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 61/61 [00:09<00:00,  6.44it/s]
metal: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 50/50 [00:06<00:00,  7.33it/s]
paper: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 72/72 [00:09<00:00,  7.88it/s]
plastic: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 58/58 [00:07<00:00,  7.26it/s]
trash: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 17/17 [00:02<00:00,  6.69it/s]


 CNN feature extraction completed
   Train shape: (1980, 2048)
   Val shape:   (282, 2048)
   Test shape:  (285, 2048)



