# Overview of the Notebook

This notebook is focused on splitting the data for building a deep learning model for classifying parking lot images as either "empty" or "not empty," using reinforcement learning.

**Data Loading and Preprocessing**:
- The dataset is loaded from the specified directory.
- Images are resized, normalized, and split into training, validation, and testing sets.
- Data is cached and prefetched for efficient pipeline performance.

In [None]:
# Standard libraries
import os
import math

# Data manipulation and machine learning
import tensorflow as tf
from tensorflow.keras import mixed_precision

# Enable mixed precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)


In [None]:
# Project path
project_path = '/content/drive/MyDrive/parkinglot_project'

# Dataset paths in google drive
dataset_path = os.path.join(project_path, 'dataset/clf-data')
empty_path = os.path.join(dataset_path, 'empty')
not_empty_path = os.path.join(dataset_path, 'not_empty')

# Models directory
MODELS_DIRECTORY = os.path.join(project_path, 'saved_models')

# Create the directory if it doesn't exist
if not os.path.exists(MODELS_DIRECTORY):
    os.makedirs(MODELS_DIRECTORY)


In [3]:
# --- Set Parameters ---
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 10
DATASET_PATH = dataset_path

VAL_SIZE = 0.15
TEST_SIZE = 0.15

In [None]:
# --- Load and Preprocess Data ---

def load_and_preprocess_image():

    # Load full dataset and resize
    full_dataset = tf.keras.utils.image_dataset_from_directory(
        DATASET_PATH,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='int',
        shuffle=True,
        seed=42
    )

    class_names = full_dataset.class_names
    print("Class names:", class_names)

    for images, labels in full_dataset.take(1):
        print("Image shape:", images[0].shape)
        print("Image dtype:", images[0].dtype)
        print("Pixel min/max:", tf.reduce_min(images[0]).numpy(), tf.reduce_max(images[0]).numpy())

    # Rescaling the data
    def preprocess_image(image):
        return tf.keras.applications.mobilenet_v2.preprocess_input(image)

    full_dataset = full_dataset.map(lambda x, y: (preprocess_image(x), y))

    print("After rescaling:")
    for images, labels in full_dataset.take(1):
        print("Image shape:", images[0].shape)
        print("Image dtype:", images[0].dtype)
        print("Pixel min/max:", tf.reduce_min(images[0]).numpy(), tf.reduce_max(images[0]).numpy())

    # Get total batches
    total_batches = tf.data.experimental.cardinality(full_dataset).numpy()
    print ('Total batches:',total_batches )

    # Split dataset into training, validation, and testing datasets
    test_batches = math.floor(TEST_SIZE * total_batches)
    val_batches = math.floor(VAL_SIZE * total_batches)

    print (f"Test batches: {test_batches} of {BATCH_SIZE} images per batch, for a total of {test_batches*BATCH_SIZE} images")
    print (f"Val batches: {val_batches} of {BATCH_SIZE} images per batch, for a total of {val_batches*BATCH_SIZE} images")
    print (f"Train batches: {total_batches - test_batches - val_batches} of {BATCH_SIZE} images per batch, for a total of {(total_batches - test_batches - val_batches)*BATCH_SIZE} images")

    assert test_batches + val_batches <= total_batches, \
        "Sum of test and validation batches exceeds total number of batches"

    # Split the dataset
    test_dataset = full_dataset.take(test_batches)
    val_dataset = full_dataset.skip(test_batches).take(val_batches)
    train_dataset = full_dataset.skip(test_batches + val_batches)

    # Prefetch and cache data for better pipeline performance
    AUTOTUNE = tf.data.AUTOTUNE
    train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE).cache()
    val_dataset = val_dataset.prefetch(buffer_size=AUTOTUNE).cache()
    test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE).cache()

    return train_dataset, val_dataset, test_dataset, class_names

In [7]:
load_and_preprocess_image()

Found 6090 files belonging to 2 classes.
Class names: ['empty', 'not_empty']
Image shape: (224, 224, 3)
Image dtype: <dtype: 'float32'>
Pixel min/max: 4.927328 219.40182
After rescaling:
Image shape: (224, 224, 3)
Image dtype: <dtype: 'float32'>
Pixel min/max: -0.9955686 0.7708603
Total batches: 191
Test batches: 28 of 32 images per batch, for a total of 896 images
Val batches: 28 of 32 images per batch, for a total of 896 images
Train batches: 135 of 32 images per batch, for a total of 4320 images


(<CacheDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>,
 <CacheDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>,
 <CacheDataset element_spec=(TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>,
 ['empty', 'not_empty'])