In [1]:
# increase the diversity of your training data, which can help prevent overfitting
# https://www.tensorflow.org/tutorials/images/data_augmentation

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow import keras
from keras import layers, models, optimizers 

In [15]:
IMG_SIZE = 180
BATCH_SIZE = 32

In [16]:
# Set the path to your dataset directory
train_directory = r'C:\Users\oswal\Desktop\ImageClassification\elpv-dataset\train_with_bad_images'
test_directory = r'C:\Users\oswal\Desktop\ImageClassification\elpv-dataset\test_images'

# Load the dataset | reqires every class to have its own folder
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_directory,
    image_size=(IMG_SIZE, IMG_SIZE),  # Resize images to match your model's input size
    batch_size=BATCH_SIZE,            # Batch size (same as the one in your prepare function)
    label_mode='int',                 # Label mode: 'int' will encode labels as integers
    class_names=['good', 'bad']       # Optional: Specify class names
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    test_directory,
    image_size=(IMG_SIZE, IMG_SIZE),  # Resize images to match your model's input size
    batch_size=BATCH_SIZE,            # Batch size (same as the one in your prepare function)
    label_mode='int',                 # Label mode: 'int' will encode labels as integers
    class_names=['good', 'bad']       # Optional: Specify class names
)

Found 1896 files belonging to 2 classes.
Found 336 files belonging to 2 classes.


In [19]:
# allows the library to automatically tune the number of parallel calls made when mapping functions over the dataset
AUTOTUNE = tf.data.AUTOTUNE

# Layer for preprocessing
resize_and_rescale = tf.keras.Sequential([
  layers.Resizing(IMG_SIZE, IMG_SIZE),
  layers.Rescaling(1./255) # Normalize pixel values between 0 and 1
])
# Data augmentation 
data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(0.2),
  layers.RandomZoom(0.2),
])

# Function to prepare the dataset for training
def prepare(ds, shuffle=False, augment=False):
  # Resize and rescale all datasets.
  ds = ds.map(lambda x, y: (resize_and_rescale(x), y), 
              num_parallel_calls=AUTOTUNE)

  if shuffle:
    ds = ds.shuffle(1000)

    # Use data augmentation only on the training set.
  if augment:
    ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y), 
                num_parallel_calls=AUTOTUNE)
    
  # Batch all datasets.
  ds = ds.batch(BATCH_SIZE)

  # Use buffered prefetching on all datasets.
  return ds.prefetch(buffer_size=AUTOTUNE)

In [20]:
# only traainig data should be augmented
train_images = prepare(train_ds, shuffle=True, augment=True)
#val_ds = prepare(val_ds)
test_images = prepare(test_ds)




In [21]:
EPOCHS=10
OPTIMIZER="adam" #change manually !!!
LEARNING_RATE=1e-3

In [26]:
# building the Convolutional Base
model = models.Sequential()
#  32, 32, 3 means we will process 32 filters of size 3x3 | imput shape 64, 64, 1 means 64x64 pixels with one chanell for RGB change to 3
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
# pooling layses to downsample our feature maps and reduce their dimensions
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add dense layers to classify into two classes true false 
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
# Last layer with two neurons to classify between true and false 
model.add(layers.Dense(2))

#model.summary()

In [34]:
model.compile(optimizer=optimizers.Adam(learning_rate=LEARNING_RATE),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
# Train model
history = model.fit(train_images, epochs=EPOCHS, batch_size=BATCH_SIZE,
                    validation_data=test_images)

ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'tensorflow.python.data.ops.dataset_ops.PrefetchDataset'>]