In [7]:
import os
import shutil
import random
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

In [8]:
DATA_DIR = "../dataset"
# Temporary reduced dataset
REDUCED_DATA_DIR = "../reduced_dataset"
IMAGE_SIZE = (50, 50)
BATCH_SIZE = 32
EPOCHS = 10
MODEL_PATH = "../backend/model/model.h5"

In [15]:
def reduce_dataset():
    print("📦 Reducing dataset to 10,000 samples (5,000 per class)...")

    # Remove and recreate reduced directory
    if os.path.exists(REDUCED_DATA_DIR):
        shutil.rmtree(REDUCED_DATA_DIR)
    os.makedirs(REDUCED_DATA_DIR, exist_ok=True)

    for label in ["0", "1"]:
        src_dir = os.path.join(DATA_DIR, label)
        dst_dir = os.path.join(REDUCED_DATA_DIR, label)
        os.makedirs(dst_dir, exist_ok=True)

        all_images = os.listdir(src_dir)
        selected = random.sample(all_images, 5000)

        for img_name in selected:
            src_path = os.path.join(src_dir, img_name)
            dst_path = os.path.join(dst_dir, img_name)
            shutil.copy2(src_path, dst_path)


In [9]:
def build_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [10]:
def train():
    reduce_dataset()  # Step 1: create reduced dataset

    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

    train_gen = datagen.flow_from_directory(
        REDUCED_DATA_DIR,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='training'
    )

    val_gen = datagen.flow_from_directory(
        REDUCED_DATA_DIR,
        target_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='binary',
        subset='validation'
    )

    input_shape = (IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
    model = build_model(input_shape)

    checkpoint = ModelCheckpoint(MODEL_PATH, monitor='val_accuracy', save_best_only=True)

    model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS, callbacks=[checkpoint])
    print(f"✅ Model saved to {MODEL_PATH}")


In [13]:
# Make sure all previous cells are run before this cell!
train()

NameError: name 'reduce_dataset' is not defined