In [13]:
#!pip install scikit-learn

In [14]:
#!pip install opencv-python

In [15]:
#!pip install tensorflow

In [16]:
import os, math, random
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications import Xception
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split

In [17]:
# --- config (ใช้ค่าของคุณ) ---
DATA_DIR = "./DataTrain/image"
VOLUME_FILES_DIR = "./DataTrain/label"
MODEL_SAVE_PATH = "./mangosteen_volume_model_all.h5"
IMG_SIZE = 224
BATCH_SIZE = 32
INITIAL_EPOCHS = 30
FINE_TUNE_EPOCHS = 20
LEARNING_RATE_INITIAL = 1e-3
LEARNING_RATE_FINE_TUNE = 1e-4

In [18]:
# --- helper augmentation (numpy-based, per-image) ---
def random_brightness_contrast(image):
    """
    image: float32, range 0..255
    contrast alpha ~ U(0.8,1.2)
    brightness beta ~ U(-0.2*255, 0.2*255)
    """
    alpha = np.random.uniform(0.8, 1.2)
    beta = np.random.uniform(-0.2, 0.2) * 255.0
    out = image * alpha + beta
    out = np.clip(out, 0.0, 255.0)
    return out

# --- Sequence implementation (stable for Keras) ---
class MangosteenSequence(Sequence):
    def __init__(self, image_paths, volumes, batch_size, img_size, is_training=True, shuffle=True):
        self.image_paths = list(image_paths)
        self.volumes = list(volumes)
        self.batch_size = batch_size
        self.img_size = img_size
        self.is_training = is_training
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        # return number of batches per epoch (ceil ensures we include last partial batch)
        return math.ceil(len(self.image_paths) / self.batch_size)

    def __getitem__(self, idx):
        start = idx * self.batch_size
        end = min(start + self.batch_size, len(self.image_paths))
        batch_paths = self.image_paths[start:end]
        batch_vols = self.volumes[start:end]

        images = []
        for p in batch_paths:
            img = cv2.imread(p)
            if img is None:
                # If an image fails to load, replace with zeros and warn
                print(f"⚠️ Warning: cv2.imread failed for {p}")
                img = np.zeros((self.img_size, self.img_size, 3), dtype=np.uint8)
            else:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (self.img_size, self.img_size))
            img = img.astype('float32')
            if self.is_training:
                # per-image augmentation
                img = random_brightness_contrast(img)
            images.append(img)

        images = np.stack(images, axis=0) / 255.0  # normalize to 0..1
        vols = np.array(batch_vols, dtype='float32')
        return images, vols

    def on_epoch_end(self):
        if self.shuffle and self.is_training:
            combined = list(zip(self.image_paths, self.volumes))
            random.shuffle(combined)
            self.image_paths, self.volumes = zip(*combined)
            self.image_paths = list(self.image_paths)
            self.volumes = list(self.volumes)


# --- load data (reuse your function or keep current) ---
def load_data_from_folders(data_dir, volume_dir):
    image_paths = []
    volumes = []
    if not os.path.exists(data_dir):
        print(f"❌ Image data directory not found: {data_dir}")
        return [], []
    if not os.path.exists(volume_dir):
        print(f"❌ Volume data directory not found: {volume_dir}")
        return [], []
    for filename in os.listdir(data_dir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            base = filename.split('.', 1)[0]
            vol_file = os.path.join(volume_dir, base + ".txt")
            if os.path.exists(vol_file):
                try:
                    with open(vol_file,'r') as f:
                        v = float(f.read().strip())
                    image_paths.append(os.path.join(data_dir, filename))
                    volumes.append(v)
                except Exception as e:
                    print(f"⚠️ Skipping {filename}: {e}")
            else:
                print(f"⚠️ No matching volume file for {filename}")
    return image_paths, volumes

In [19]:
print("Loading data...")
image_paths, volumes = load_data_from_folders(DATA_DIR, VOLUME_FILES_DIR)
if not image_paths:
    raise SystemExit("❌ No matching image and volume data found. Please check your paths and filenames.")

train_paths, val_paths, train_volumes, val_volumes = train_test_split(
    image_paths, volumes, test_size=0.2, random_state=42
)

print(f"Total samples: {len(image_paths)}, train: {len(train_paths)}, val: {len(val_paths)}")

Loading data...
Total samples: 235, train: 188, val: 47


In [20]:
# --- create sequences ---
train_seq = MangosteenSequence(train_paths, train_volumes, BATCH_SIZE, IMG_SIZE, is_training=True)
val_seq = MangosteenSequence(val_paths, val_volumes, BATCH_SIZE, IMG_SIZE, is_training=False, shuffle=False)

# quick debug print
print("Batches per epoch (train):", len(train_seq), " (val):", len(val_seq))
x0, y0 = train_seq[0]
print("Sample batch shape:", x0.shape, y0.shape)

Batches per epoch (train): 6  (val): 2
Sample batch shape: (32, 224, 224, 3) (32,)


In [21]:
# --- model build (reuse your architecture) ---
print("\nBuilding model...")
base_model = Xception(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu', kernel_regularizer=l2(0.01))(x)
x = Dropout(0.3)(x)
x = Dense(512, activation='relu', kernel_regularizer=l2(0.01))(x)
predictions = Dense(1, activation='linear')(x)
model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=LEARNING_RATE_INITIAL), loss='mean_squared_error', metrics=['mean_absolute_error'])



Building model...


In [22]:
# --- callbacks ---
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
]

In [23]:
# --- fit (use Sequence, no manual steps_per_epoch) ---
print("Training the regression head...")
model.fit(
    train_seq,
    validation_data=val_seq,
    epochs=INITIAL_EPOCHS,
    callbacks=callbacks,
    verbose=1
)


Training the regression head...
Epoch 1/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4s/step - loss: 1849.5120 - mean_absolute_error: 35.9131 - val_loss: 1268.9559 - val_mean_absolute_error: 31.1261 - learning_rate: 0.0010
Epoch 2/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - loss: 838.1856 - mean_absolute_error: 22.9715 - val_loss: 499.7124 - val_mean_absolute_error: 17.3887 - learning_rate: 0.0010
Epoch 3/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3s/step - loss: 574.8273 - mean_absolute_error: 16.7079 - val_loss: 211.1373 - val_mean_absolute_error: 10.9952 - learning_rate: 0.0010
Epoch 4/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - loss: 478.3894 - mean_absolute_error: 17.3402 - val_loss: 223.6968 - val_mean_absolute_error: 11.3985 - learning_rate: 0.0010
Epoch 5/30
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - loss: 398.8846 - mean_absolute_error: 14.1

<keras.src.callbacks.history.History at 0x1c0a8bdcd90>

In [24]:
# --- fine-tune ---
print("\nFine-tuning the model (unfreeze last blocks)...")
# (หลังจาก unfreeze และ compile แล้ว)
model.fit(
    train_seq,
    validation_data=val_seq,
    epochs=FINE_TUNE_EPOCHS,
    callbacks=callbacks,
    verbose=1
)

print(f"\nSaving model to {MODEL_SAVE_PATH}")
model.save(MODEL_SAVE_PATH)
print("✅ Model saved.")


Fine-tuning the model (unfreeze last blocks)...
Epoch 1/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3s/step - loss: 374.4413 - mean_absolute_error: 14.4978 - val_loss: 210.9432 - val_mean_absolute_error: 10.9841 - learning_rate: 4.0000e-05
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - loss: 367.8642 - mean_absolute_error: 14.2360 - val_loss: 210.7552 - val_mean_absolute_error: 10.9263 - learning_rate: 4.0000e-05
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - loss: 383.7061 - mean_absolute_error: 14.4072 - val_loss: 210.8461 - val_mean_absolute_error: 10.8539 - learning_rate: 4.0000e-05
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - loss: 373.4936 - mean_absolute_error: 14.0215 - val_loss: 211.4458 - val_mean_absolute_error: 10.7969 - learning_rate: 4.0000e-05
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - loss: 381.2




Saving model to ./mangosteen_volume_model_all.h5
✅ Model saved.
