In [6]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from dataset_loader import load_images
import numpy as np

# Load and preprocess data
X, y = load_images('bottle')
print("Loaded:", len(X), "images")

X = X / 255.0

# Print class distribution
unique, counts = np.unique(y, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))

# Safe split (without stratify if class imbalance is too high)
if len(unique) > 1 and min(counts) >= 5:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
else:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Build model
base_model = tf.keras.applications.EfficientNetB0(
    include_top=False, input_shape=(224, 224, 3), pooling='avg', weights='imagenet'
)
model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train
model.fit(X_train, y_train, epochs=5, validation_split=0.1)

# Save model
model.save('bottle_defect_model.h5')

# Evaluate
y_pred = model.predict(X_test).ravel() > 0.5
print(classification_report(y_test, y_pred))


Loaded: 0 images
Class distribution: {}


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.