In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.models import Model
from tensorflow.keras.applications.efficientnet import preprocess_input

# Constants
IMAGE_SIZE = (300, 300)
BATCH_SIZE = 32
DATA_DIR = 'train_images/'  # Only SOIL images available

# Load image filenames
image_paths = [os.path.join(DATA_DIR, fname) for fname in os.listdir(DATA_DIR) if fname.lower().endswith(('.jpg', '.png', '.jpeg'))]

# Load EfficientNetB3 base model for feature extraction
base_model = EfficientNetB3(include_top=False, input_shape=(300, 300, 3), weights='imagenet', pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features from soil images
def extract_features(image_paths):
    features = []
    for path in tqdm(image_paths):
        img = Image.open(path).convert("RGB").resize(IMAGE_SIZE)
        img_array = np.array(img) / 255.0
        img_array = preprocess_input(img_array)
        img_array = np.expand_dims(img_array, axis=0)
        feat = model.predict(img_array, verbose=0)
        features.append(feat.squeeze())
    return np.array(features)

soil_features = extract_features(image_paths)

# Compute training distribution stats
mean_vec = np.mean(soil_features, axis=0)
cov_matrix = np.cov(soil_features, rowvar=False)
inv_cov_matrix = np.linalg.pinv(cov_matrix)

# Save stats for inference
np.save("mean_vec.npy", mean_vec)
np.save("inv_cov_matrix.npy", inv_cov_matrix)
np.save("soil_features.npy", soil_features)
