# Important notice
- Run this notebook in Docker

In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.io import read_file, decode_png

In [None]:
load_dotenv()

DATA_DIR = os.getenv("DATA_DIR")
PROCESSED_DATA_DIR = f"{DATA_DIR}/processed"

In [None]:
# Load image 0.png from the data directory
img = read_file(f"{PROCESSED_DATA_DIR}/images/train/not_rotten/0.png")

# Decode the image
img = decode_png(img, channels=3)

### Define image loading constants

In [None]:
IMAGE_WIDTH = img.shape[0]
IMAGE_HEIGHT = img.shape[1]
BATCH_SIZE = 32
SEED = 42
VALIDATION_SPLIT = 0.2

### Load data

In [None]:
train_df = pd.read_csv(f"{PROCESSED_DATA_DIR}/train.csv")
test_df = pd.read_csv(f"{PROCESSED_DATA_DIR}/test.csv")

In [None]:
train_generator = ImageDataGenerator(
    rescale=1./255,
    # rotation_range=15,
    # width_shift_range=0.1,
    # height_shift_range=0.1,
    # horizontal_flip=True,
    # vertical_flip=True,
    validation_split=VALIDATION_SPLIT,
)

train_ds = train_generator.flow_from_directory(
    directory=f"{DATA_DIR}/processed/images/train",
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    seed=SEED,
    subset="training",
)

In [None]:
validation_generator = ImageDataGenerator(
    rescale=1./255,
    validation_split=VALIDATION_SPLIT,
)

validation_ds = validation_generator.flow_from_directory(
    directory=f"{DATA_DIR}/processed/images/train",
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode="binary",
    seed=SEED,
    subset="validation",
)

### Define modeling constants

In [None]:
CLASS_MODE = "binary"
LOSS_FUNCTION = "binary_crossentropy"