In [None]:
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
from helpers import split_data_set, view_dataset_batches, convert_to_gray_scale, get_class_distribution, plot_bar, plot_pie

In [None]:
from vars import IMAGE_SIZE, DATASET_DIRECTORY, BATCH_SIZE, TRAINING_SIZE, SHUFFLE_SIZE, SEED, COLORS, EPOCHS, FILTER_SIZE, INPUT_SHAPE, POOLING_SIZE, FILTERS_NUMBER, HUGE_FILTER_SIZE

<h1>Fetch Data From Dataset</h1>

In [None]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    DATASET_DIRECTORY,
    shuffle=True,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
)

In [None]:
class_names = dataset.class_names
class_names

In [None]:
n_classes = len(class_names)
n_classes

In [None]:
len(dataset)

In [None]:
for image_batch, label_batch in dataset.take(1):
    print("Image shape:", image_batch.shape)
    print("Image class:", label_batch.numpy())

In [None]:
view_dataset_batches(dataset, class_names)

<p>Convert to gray scale</p>

In [None]:
dataset = dataset.map(convert_to_gray_scale)

<h1>Split data</h1>
<h4 style="margin-bottom: 8px;">we will split the dataset to:</h4>
<p style="margin: 0;">80% ==> training</p>
<p style="margin: 0;">20% ==> 10% validation, 10% test</p>

In [None]:
training_ds, validation_ds, testing_ds = split_data_set(dataset, TRAINING_SIZE, 0.1, True, SHUFFLE_SIZE, SEED)

In [None]:
len(training_ds)

In [None]:
len(validation_ds)

In [None]:
len(testing_ds)

In [None]:
training_ds = training_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)
validation_ds = validation_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)
testing_ds = testing_ds.cache().shuffle(1000).prefetch(buffer_size = tf.data.AUTOTUNE)

In [None]:
for image_batch, label_batch in dataset.take(1):
    print("Image shape:", image_batch.shape)
    print("Image class:", label_batch.numpy())

In [None]:
view_dataset_batches(dataset, class_names, is_gray=True)

<h1>Understand and see the dataset</h1>

In [None]:
train_counts = get_class_distribution(training_ds, class_names)
train_counts

In [None]:
validation_counts = get_class_distribution(validation_ds, class_names)
validation_counts

In [None]:
test_counts = get_class_distribution(testing_ds, class_names)
test_counts

<p style="color: #e01; font-size: 23px;">Training dataset</p>

In [None]:
plot_bar(train_counts, "Class Distribution in Training Data")
plot_pie(train_counts, "Percentage Distribution in Training Data")

<p style="color: #0e1; font-size: 23px;">Validation dataset</p>

In [None]:
plot_bar(validation_counts, "Class Distribution in Validation Data")
plot_pie(validation_counts, "Percentage Distribution in Validation Data")

<p style="color: #1ee; font-size: 23px;">Testing dataset</p>

In [None]:
plot_bar(test_counts, "Class Distribution in Test Data")
plot_pie(test_counts, "Percentage Distribution in Test Data")

<p style="color: #e5ef23; font-size: 23px;">dataset</p>

In [None]:
merged_data = pd.merge(train_counts, validation_counts, on='Class', how='outer', suffixes=('_Train', '_Validation'))
merged_data = pd.merge(merged_data, test_counts, on='Class', how='outer')

In [None]:
merged_data.columns = ['Class', 'Train', 'Validation', 'Test']
merged_data

In [None]:
merged_data = merged_data.sort_values(by='Class')
merged_data

In [None]:
reshaped_data = merged_data.melt(id_vars='Class', value_vars=['Train', 'Validation', 'Test'],
                                var_name='Dataset', value_name='Count')
reshaped_data

In [None]:
plt.figure(figsize=(14, 7))
sns.barplot(x='Class', y='Count', hue='Dataset', data=reshaped_data, palette=COLORS)
plt.title('Class Distribution for Training, Validation, and Testing')

plt.xticks(rotation=45, ha='right', fontsize=10)

plt.grid(axis='y', linestyle='--', alpha=0.6)

plt.tight_layout()

plt.show()

<h1>Pre-processing</h1>

In [None]:
resize_and_rescale = tf.keras.Sequential([
    layers.Resizing(IMAGE_SIZE, IMAGE_SIZE),
    layers.Rescaling(1.0/255) 
])

<h1>Data Augmentation</h1>

In [None]:
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"), 
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(0.1, 0.1),
    layers.RandomContrast(0.1),
    layers.RandomBrightness(0.2),
    layers.GaussianNoise(0.05),
])

In [None]:
model = models.Sequential([
    resize_and_rescale,
    data_augmentation,
    layers.Conv2D(FILTERS_NUMBER, HUGE_FILTER_SIZE, activation="relu", input_shape = INPUT_SHAPE),
    layers.MaxPooling2D(POOLING_SIZE),
    layers.Conv2D(FILTERS_NUMBER * 2, HUGE_FILTER_SIZE, activation="relu"),
    layers.MaxPooling2D(POOLING_SIZE),
    layers.Conv2D(FILTERS_NUMBER * 2, FILTER_SIZE, activation="relu"),
    layers.MaxPooling2D(POOLING_SIZE),
    layers.Conv2D(FILTERS_NUMBER * 2, FILTER_SIZE, activation="relu"),
    layers.MaxPooling2D(POOLING_SIZE),
    layers.Conv2D(FILTERS_NUMBER * 2, FILTER_SIZE, activation="relu"),
    layers.MaxPooling2D(POOLING_SIZE),
    layers.Conv2D(FILTERS_NUMBER * 2, FILTER_SIZE, activation="relu", padding="same"),
    layers.MaxPooling2D(POOLING_SIZE),
    layers.LeakyReLU(alpha=0.2),
    layers.Flatten(),
    layers.Dense(64, activation="relu"),
    layers.Dense(n_classes, activation="softmax")
])

model.build(input_shape=INPUT_SHAPE)

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer="adam",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=["accuracy"]
)

<h1>Model training</h1>

In [None]:
history = model.fit(
    training_ds,
    epochs=EPOCHS,
    verbose=1,
    validation_data=validation_ds
)

<h1>Model showing results</h1>

In [None]:
acc = history.history["accuracy"]
acc

In [None]:
val_acc = history.history["val_accuracy"]
val_acc

In [None]:
recall = history.history["recall"]
recall

In [None]:
val_recall = history.history["val_recall"]
val_recall

In [None]:
loss = history.history["loss"]
loss

In [None]:
val_loss = history.history["val_loss"]
val_loss

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(range(EPOCHS), acc, label="Training Accuracy")
plt.plot(range(EPOCHS), val_acc, label="Validation Accuracy")
plt.legend(loc="lower right")
plt.title("Training and Validation Accuracy")

plt.subplot(1, 2, 2)
plt.plot(range(EPOCHS), loss, label="Training Loss")
plt.plot(range(EPOCHS), val_loss, label="Validation Loss")
plt.legend(loc="upper right")
plt.title("Training and Validation Loss")

In [None]:
plt.figure(figsize=(8, 8))
plt.subplot(1, 3, 1)
plt.plot(range(EPOCHS), acc, label="Training Accuracy")
plt.plot(range(EPOCHS), val_acc, label="Validation Accuracy")
plt.legend(loc="lower right")
plt.title("Training and Validation Accuracy")

plt.figure(figsize=(8, 8))
plt.subplot(1, 3, 2)
plt.plot(range(EPOCHS), recall, label="Training Recall")
plt.plot(range(EPOCHS), val_recall, label="Validation Recall")
plt.legend(loc="lower right")
plt.title("Training and Validation Recall")

plt.subplot(1, 2, 3)
plt.plot(range(EPOCHS), loss, label="Training Loss")
plt.plot(range(EPOCHS), val_loss, label="Validation Loss")
plt.legend(loc="upper right")
plt.title("Training and Validation Loss")

In [None]:
plt.figure(figsize=(15, 15))
for images, labels in testing_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i+1)
        plt.imshow(images[i].numpy().astype("uint8"))
        
        predicted_class, confidence = predict(model, images[i].numpy(), class_names)
        actual_class = class_names[labels[i]]
        
        plt.title(f"Actual: {actual_class},\n Prediction: {predicted_class},\n Confidence: {confidence}")
        plt.axis("off")

In [None]:
model.export(f"../../model")