In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tqdm import tqdm

# Load feature extractor
feat_model = keras.applications.EfficientNetB0(
    include_top=False,
    pooling="avg",
    input_shape=(224, 224, 3),
    weights="imagenet"
)

# Subsample to avoid RAM blowup
MAX_SAMPLES = 2000

images = []
labels = []

for batch_imgs, batch_labels in train_ds.unbatch().take(MAX_SAMPLES):
    img = tf.image.resize(batch_imgs, (224,224))
    images.append(img.numpy())
    labels.append(batch_labels.numpy())

images = np.array(images)
labels = np.array(labels)

print("Loaded:", images.shape, labels.shape)

In [None]:
embeddings = feat_model.predict(images, batch_size=32)
print("Embeddings:", embeddings.shape)

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
emb_2d = pca.fit_transform(embeddings)
print("Variance explained:", pca.explained_variance_ratio_)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))
scatter = plt.scatter(emb_2d[:,0], emb_2d[:,1], c=labels, cmap='tab20', s=10)
plt.colorbar(scatter)
plt.title("PCA of EfficientNet-B0 Features")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()