# 0. Import Packages

In [12]:
import pandas as pd
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tqdm import tqdm
from datetime import date

# 1. Feature Extraction via Pre-Trained CNN for Training Data

## 1.1 Configurations 

In [16]:

# === PLEASE CONFIG ===
IMAGE_SIZE = (224, 224)
DATE= date.today().strftime("%Y_%m_%d")
DATASET_PATH = "/Users/felix/Documents/Data Science/06_Offical_project_DS/may25_bds_plants/05_data/original_data/2.1.1 New Plant Diseases/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train"
FEATURES_OUTPUT = f"/Users/felix/Documents/Data Science/06_Offical_project_DS/may25_bds_plants/05_data/computed_data/{DATE}_features_train.npy"
LABELS_OUTPUT = f"/Users/felix/Documents/Data Science/06_Offical_project_DS/may25_bds_plants/05_data/computed_data/{DATE}_labels_train.npy"


## 1.2 Extracting Process via CNN (MobileNetV2)

In [14]:
# === LOAD CNN (pre-trained, feature extractor only) ===
model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")

# === HELPER: Preprocess image ===
def preprocess_image(img_path):
    try:
        img = image.load_img(img_path, target_size=IMAGE_SIZE)
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        return preprocess_input(img_array)
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        return None

# === FEATURE EXTRACTION ===
features = []
labels = []

for label in tqdm(sorted(os.listdir(DATASET_PATH)), desc="Processing classes"):
    class_dir = os.path.join(DATASET_PATH, label)
    if not os.path.isdir(class_dir):
        continue

    for fname in os.listdir(class_dir):
        fpath = os.path.join(class_dir, fname)
        if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        pre_img = preprocess_image(fpath)
        if pre_img is None:
            continue

        feature = model.predict(pre_img, verbose=0).flatten()
        features.append(feature)
        labels.append(label)

features = np.array(features)
labels = np.array(labels)

print(f"\n Done. Extracted {features.shape[0]} feature vectors of dimension {features.shape[1]}")


  model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")
Processing classes: 100%|██████████| 39/39 [37:03<00:00, 57.02s/it]


 Done. Extracted 70295 feature vectors of dimension 1280





## 1.3 Save to Local Mashine

In [17]:
np.save(FEATURES_OUTPUT, features)
np.save(LABELS_OUTPUT, labels)
print(f"Features saved to {FEATURES_OUTPUT}")
print(f"Labels saved to {LABELS_OUTPUT}")


Features saved to /Users/felix/Documents/Data Science/06_Offical_project_DS/may25_bds_plants/05_data/computed_data/2025_06_25_features_train.npy
Labels saved to /Users/felix/Documents/Data Science/06_Offical_project_DS/may25_bds_plants/05_data/computed_data/2025_06_25_labels_train.npy


# 2. Feature Extraction via Pre-Trained CNN for Test Data

## 2.1 Configurations 

In [None]:

# === PLEASE RECONFIG ===
DATASET_PATH = "/Users/felix/Documents/Data Science/06_Offical_project_DS/may25_bds_plants/05_data/original_data/2.1.1 New Plant Diseases/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid"
FEATURES_OUTPUT = f"/Users/felix/Documents/Data Science/06_Offical_project_DS/may25_bds_plants/05_data/computed_data/{DATE}_features_valid.npy"
LABELS_OUTPUT = f"/Users/felix/Documents/Data Science/06_Offical_project_DS/may25_bds_plants/05_data/computed_data/{DATE}_labels_valid.npy"

## 2.2 Extracting Process via CCN (MobileNetV2)

In [None]:
# === LOAD CNN (pre-trained, feature extractor only) ===
model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")

# === HELPER: Preprocess image ===
def preprocess_image(img_path):
    try:
        img = image.load_img(img_path, target_size=IMAGE_SIZE)
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        return preprocess_input(img_array)
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        return None

# === FEATURE EXTRACTION ===
features = []
labels = []

for label in tqdm(sorted(os.listdir(DATASET_PATH)), desc="Processing classes"):
    class_dir = os.path.join(DATASET_PATH, label)
    if not os.path.isdir(class_dir):
        continue

    for fname in os.listdir(class_dir):
        fpath = os.path.join(class_dir, fname)
        if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
            continue

        pre_img = preprocess_image(fpath)
        if pre_img is None:
            continue

        feature = model.predict(pre_img, verbose=0).flatten()
        features.append(feature)
        labels.append(label)

features = np.array(features)
labels = np.array(labels)

print(f"\n Done. Extracted {features.shape[0]} feature vectors of dimension {features.shape[1]}")


  model = MobileNetV2(weights="imagenet", include_top=False, pooling="avg")
Processing classes: 100%|██████████| 39/39 [09:26<00:00, 14.53s/it]


 Done. Extracted 17572 feature vectors of dimension 1280





## 2.3 Save to Local Mashine

In [None]:
np.save(FEATURES_OUTPUT, features)
np.save(LABELS_OUTPUT, labels)

print(f"Features saved to {FEATURES_OUTPUT}")
print(f"Labels saved to {LABELS_OUTPUT}")
