In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D

In [2]:
base_model = ResNet50(
    weights="imagenet",
    include_top=False, # remove the last layer for having features only
    input_shape=(224, 224, 3)
)

x = GlobalAveragePooling2D()(base_model.output)
model = Model(base_model.input, x)
print(f"Feature extractor output shape: {model.output_shape}")

Feature extractor output shape: (None, 2048)


In [3]:
def preprocess_image(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = np.expand_dims(img, axis=0)
    return preprocess_input(img)

In [4]:
def extract_features(dataset_path):
    X, y = [], []
    for cls in sorted(os.listdir(dataset_path)):
        cls_path = os.path.join(dataset_path, cls)
        if not os.path.isdir(cls_path):
            continue

        for img_name in tqdm(os.listdir(cls_path), desc=cls):
            img = cv2.imread(os.path.join(cls_path, img_name))
            if img is None:
                continue

            img = preprocess_image(img)
            feat = model.predict(img, verbose=0)
            X.append(feat[0])   # 2048 features
            y.append(cls)

    return np.array(X), np.array(y)

In [5]:
print("\nExtracting training features...")
X_train, y_train = extract_features("../data_split/train_aug")

print("\nExtracting validation features...")
X_val, y_val = extract_features("../data_split/val")

print("\nExtracting test features...")
X_test, y_test = extract_features("../data_split/test")

# Save all features
os.makedirs("../features", exist_ok=True)
np.save("../features/X_train_cnn.npy", X_train)
np.save("../features/X_val_cnn.npy", X_val)
np.save("../features/X_test_cnn.npy", X_test)
np.save("../features/y_train.npy", y_train)
np.save("../features/y_val.npy", y_val)
np.save("../features/y_test.npy", y_test)

print(f"\n CNN feature extraction completed")
print(f"   Train shape: {X_train.shape}")
print(f"   Val shape:   {X_val.shape}")
print(f"   Test shape:  {X_test.shape}")


Extracting training features...


cardboard: 100%|██████████| 639/639 [00:56<00:00, 11.26it/s]
glass: 100%|██████████| 536/536 [00:43<00:00, 12.19it/s]
metal: 100%|██████████| 589/589 [00:48<00:00, 12.27it/s]
paper: 100%|██████████| 472/472 [00:38<00:00, 12.39it/s]
plastic: 100%|██████████| 539/539 [00:43<00:00, 12.25it/s]
trash: 100%|██████████| 747/747 [01:07<00:00, 11.00it/s]



Extracting validation features...


cardboard: 100%|██████████| 78/78 [00:06<00:00, 12.04it/s]
glass: 100%|██████████| 120/120 [00:09<00:00, 12.34it/s]
metal: 100%|██████████| 98/98 [00:08<00:00, 11.91it/s]
paper: 100%|██████████| 142/142 [00:11<00:00, 12.08it/s]
plastic: 100%|██████████| 116/116 [00:08<00:00, 13.16it/s]
trash: 100%|██████████| 32/32 [00:02<00:00, 11.47it/s]



Extracting test features...


cardboard: 100%|██████████| 78/78 [00:06<00:00, 11.67it/s]
glass: 100%|██████████| 122/122 [00:10<00:00, 11.79it/s]
metal: 100%|██████████| 100/100 [00:08<00:00, 12.43it/s]
paper: 100%|██████████| 144/144 [00:11<00:00, 12.62it/s]
plastic: 100%|██████████| 116/116 [00:09<00:00, 12.38it/s]
trash: 100%|██████████| 34/34 [00:02<00:00, 12.22it/s]


 CNN feature extraction completed
   Train shape: (3522, 2048)
   Val shape:   (561, 2048)
   Test shape:  (564, 2048)



