In [7]:
import os
import shutil
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

In [16]:
DATA_DIR = "../dataset"
# Temporary reduced dataset
REDUCED_DATA_DIR = "../reduced_dataset"
IMAGE_SIZE = (50, 50)
BATCH_SIZE = 32
EPOCHS = 10
MODEL_PATH = "../backend/model/model.h5"

In [17]:
def reduce_dataset():
    print("📦 Reducing dataset to 10,000 samples (5,000 per class)...")

    # Remove and recreate reduced directory
    if os.path.exists(REDUCED_DATA_DIR):
        shutil.rmtree(REDUCED_DATA_DIR)
    os.makedirs(REDUCED_DATA_DIR, exist_ok=True)

    for label in ["0", "1"]:
        src_dir = os.path.join(DATA_DIR, label)
        dst_dir = os.path.join(REDUCED_DATA_DIR, label)
        os.makedirs(dst_dir, exist_ok=True)

        all_images = os.listdir(src_dir)
        selected = random.sample(all_images, 5000)

        for img_name in selected:
            src_path = os.path.join(src_dir, img_name)
            dst_path = os.path.join(dst_dir, img_name)
            shutil.copy2(src_path, dst_path)


In [18]:
def build_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [19]:
def train():
    reduce_dataset()  # Step 1: create reduced dataset

    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

    train_gen = datagen.flow_from_directory(
        REDUCED_DATA_DIR,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='training'
    )

    val_gen = datagen.flow_from_directory(
        REDUCED_DATA_DIR,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='validation'
    )

    input_shape = (IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
    model = build_model(input_shape)

    checkpoint = ModelCheckpoint(MODEL_PATH, monitor='val_accuracy', save_best_only=True)

    model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS, callbacks=[checkpoint])
    print(f"✅ Model saved to {MODEL_PATH}")


In [20]:
if __name__ == "__main__":
    train()

📦 Reducing dataset to 10,000 samples (5,000 per class)...
Found 8000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - accuracy: 0.6509 - loss: 0.6037



[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 164ms/step - accuracy: 0.6513 - loss: 0.6034 - val_accuracy: 0.7130 - val_loss: 0.5718
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 174ms/step - accuracy: 0.8076 - loss: 0.4461



[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 201ms/step - accuracy: 0.8076 - loss: 0.4461 - val_accuracy: 0.7165 - val_loss: 0.5472
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step - accuracy: 0.7959 - loss: 0.4593



[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 192ms/step - accuracy: 0.7959 - loss: 0.4592 - val_accuracy: 0.7655 - val_loss: 0.4908
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step - accuracy: 0.8100 - loss: 0.4365



[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 173ms/step - accuracy: 0.8100 - loss: 0.4365 - val_accuracy: 0.7720 - val_loss: 0.4948
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step - accuracy: 0.8206 - loss: 0.4162



[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 166ms/step - accuracy: 0.8206 - loss: 0.4162 - val_accuracy: 0.7850 - val_loss: 0.4682
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 167ms/step - accuracy: 0.8265 - loss: 0.4051 - val_accuracy: 0.7745 - val_loss: 0.4781
Epoch 7/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 191ms/step - accuracy: 0.8259 - loss: 0.4021 - val_accuracy: 0.7710 - val_loss: 0.4725
Epoch 8/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 184ms/step - accuracy: 0.8226 - loss: 0.4015 - val_accuracy: 0.7775 - val_loss: 0.4785
Epoch 9/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 179ms/step - accuracy: 0.8410 - loss: 0.3736 - val_accuracy: 0.7705 - val_loss: 0.4954
Epoch 10/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step - accuracy: 0.8318 - loss: 0.3876



[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 193ms/step - accuracy: 0.8318 - loss: 0.3875 - val_accuracy: 0.7885 - val_loss: 0.4735
✅ Model saved to ../backend/model/model.h5
