<a href="https://colab.research.google.com/github/IbrahemAmar/Data-mining-and-Machine-Learning-/blob/main/lab11_ex1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import MaxPooling2D
from keras.callbacks import EarlyStopping
import random

IMG_SIZE = 28

In [None]:
def plotSamples(data,rows,cols,names,tLabels,pLabels=[]):
    fig, axs = plt.subplots(rows,cols)
    nDigits = data.shape[0]
    for i in range(rows):
        for j in range(cols):
            index = random.randint(0,nDigits-1)
            axs[i,j].imshow(data[index],cmap = 'gray')
            # remove axes titles
            axs[i,j].axis('off')
            if (np.any(pLabels)):
                axs[i,j].set_title('P: %s, T: %s' % (names[pLabels[index]], names[tLabels[index]]) ,fontdict={'fontsize': 10})
            else:
                axs[i,j].set_title('T: %s' % names[tLabels[index]],fontdict={'fontsize': 10})
            # show the figure
    plt.show()

In [None]:
def baseline_model():
	# create model
	model = Sequential()
	model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
	model.add(MaxPooling2D())

  #NEW layer
	model.add(Conv2D(16, (3, 3), activation='relu'))
	model.add(MaxPooling2D())

	model.add(Dropout(0.2))
	model.add(Flatten())
	model.add(Dense(64, activation='relu'))
	model.add(Dense(nClasses, activation='softmax'))
	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam',
               metrics=['accuracy'])
	return model

In [None]:
def plotTrainingCurves(history,numEpochs):
  #Process ploting
  plt.plot(history.history['accuracy'])
  plt.plot(history.history['loss'])
  plt.plot(history.history['val_accuracy'])
  plt.plot(history.history['val_loss'])
  plt.axis([0,numEpochs,0,1])
  plt.title('Model accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Accuracy','loss','val_accuracy','val_loss'])
  plt.show()

In [None]:
# view original images with an augmented image
def visualize(original, augmented, title=""):
  plt.subplot(1,2,1)
  plt.title('Original image')
  plt.imshow(original,cmap = 'gray')

  plt.subplot(1,2,2)
  plt.title(title + " image")
  plt.imshow(augmented,cmap='gray')
  plt.show()

In [None]:
# image augmentation tensor model
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),  # ±18°
    tf.keras.layers.RandomZoom(0.2),  # Random zoom in/out
    tf.keras.layers.RandomTranslation(0.2, 0.2),  # Random shift (10% height & width)
    # tf.keras.layers.RandomFlip("vertical"), ## NEW flip vertical

])

In [None]:
# Apply it in map() — runs on GPU if model runs on GPU
def augment(image,label):
    image = data_augmentation(image, training=True)  # important: training=True
    return image,label

In [None]:
names = ["0", "1", "2", "3", "4","5", "6", "7", "8", "9"]

In [None]:
(trainData, trainLabels), (testData, testLabels) = mnist.load_data()
# summarize loaded dataset
print('Train: X=%s, y=%s' % (trainData.shape, trainLabels.shape))
print('Test: X=%s, y=%s' % (testData.shape, testLabels.shape))

In [None]:
rows = 3
cols = 3

plotSamples(trainData,rows,cols,names,trainLabels)

In [None]:
# image data augmentations

for i in range(3):
    index = index = random.randint(0,trainData.shape[0])
    image = trainData[index].reshape(28,28,1)

    # random flip
    randomFlip = tf.keras.layers.RandomFlip("horizontal")
    flippedImage = randomFlip(image)
    visualize(image,flippedImage,"Random Flip")

    # ±18° rotation
    randomRotation = tf.keras.layers.RandomRotation(0.1)
    rotatedImage = randomRotation(image)
    visualize(image,rotatedImage,"Random Rotation")

    # Random zoom in/out
    randomZoom = tf.keras.layers.RandomZoom(0.2)
    zoomedImage = randomZoom(image)
    visualize(image,zoomedImage,"Random Zoom")

    # Random shift (20% height & width)
    randomTranslation = tf.keras.layers.RandomTranslation(0.2, 0.2)
    translatedImage = randomTranslation(image)
    visualize(image, translatedImage,"Random Translation")

    #data augmentation
    augmentedImage = data_augmentation(image)
    visualize(image, augmentedImage,"augmented")

In [None]:
trainData = trainData.reshape((trainData.shape[0], 28, 28, 1)).astype('float32')
testData = testData.reshape((testData.shape[0], 28, 28, 1)).astype('float32')

catTrainLabels = to_categorical(trainLabels,num_classes=10)
catTestLabels = to_categorical(testLabels,num_classes=10)
trainData = trainData/255.0
testData = testData/255.0

In [None]:
batch_size = 64
AUTOTUNE = tf.data.AUTOTUNE
# generate a tensor train flow data set
train_ds = (
    tf.data.Dataset.from_tensor_slices((trainData, catTrainLabels))
    .shuffle(10000)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .cache()
    .batch(batch_size)
    .prefetch(AUTOTUNE)
)

In [None]:
# generate a tensor test flow data set
test_ds = (
    tf.data.Dataset.from_tensor_slices((testData, catTestLabels))
    .map(augment, num_parallel_calls=AUTOTUNE)
    .cache()
    .batch(batch_size)
    .prefetch(AUTOTUNE)
)

In [None]:
# Create NON-AUGMENTED training pipeline
train_ds_no_aug = (
    tf.data.Dataset.from_tensor_slices((trainData, catTrainLabels))
    .shuffle(10000)
    .cache()
    .batch(batch_size)
    .prefetch(AUTOTUNE)
)

# Create NON-AUGMENTED testing pipeline
test_ds_no_aug = (
    tf.data.Dataset.from_tensor_slices((testData, catTestLabels))
    .cache()
    .batch(batch_size)
    .prefetch(AUTOTUNE)
)


In [None]:
nClasses = catTestLabels.shape[1]
# train the model
model = baseline_model()
model_no_aug = baseline_model()

nEpochs = 15

In [None]:
# Define the EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    patience=2,          # Stop if no improvement for 2 epochs
    verbose=1,           # Print a message when stopping
    mode='min',          # Stop when the loss is no longer decreasing
    restore_best_weights=True # Restore the best weights found
)

In [None]:
history = model.fit(train_ds, validation_data=test_ds, epochs=nEpochs,callbacks=[early_stopping])

history_no_aug = model_no_aug.fit(
train_ds_no_aug,
    validation_data=test_ds_no_aug,
    epochs=nEpochs,
    callbacks=[early_stopping]
)

In [None]:
plotTrainingCurves(history,nEpochs)

In [None]:
# predicted the test data
predLabels = model.predict(testData).argmax(1)

In [None]:
# unpack tensor to augmented test numpy arry
augmentedTestData = tf.concat([x for x, y in test_ds], axis=0).numpy()

In [None]:
plotSamples(augmentedTestData,rows,cols,names,testLabels,predLabels)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Define models to evaluate
models_to_test = [("Augmented", model), ("Non-Augmented", model_no_aug)]

for name, m in models_to_test:
    # 1. Predict and Matrix
    preds = m.predict(testData, verbose=0).argmax(axis=1)
    cm = confusion_matrix(testLabels, preds)

    # 2. Plot
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues' if name == "Augmented" else 'Oranges')
    plt.title(f'Confusion Matrix: {name}')
    plt.show()

    # 3. Print Error Rate (Formatted strictly as Digit: Rate)
    # Calculate error rate per digit (1 - accuracy)
    error_rates = 1 - (cm.diagonal() / cm.sum(axis=1))

    print(f"\n>>> {name} Error Rates:")
    for digit, rate in enumerate(error_rates):
        print(f"{digit}: {rate:.2%}")
    print("-" * 30)

In [None]:
import random
import tensorflow as tf
import matplotlib.pyplot as plt

# 1. Pick a random image from the training set
index = random.randint(0, trainData.shape[0])
original_image = trainData[index]

# 2. Define a Vertical Flip layer specifically for visualization
vertical_flip_layer = tf.keras.layers.RandomFlip("vertical")

# 3. Apply the flip (we need to add a batch dimension first, then remove it)
# The seed ensures we get a flip for visualization purposes
flipped_image = vertical_flip_layer(tf.expand_dims(original_image, 0), training=True)
flipped_image = tf.squeeze(flipped_image) # Remove the batch dimension

# 4. Use your existing visualize function to plot them side-by-side
# We use .squeeze() on the original to ensure dimensions match for plotting
visualize(original_image.squeeze(), flipped_image, "Vertically Flipped")

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# 1. Configuration: Define the specific augmentations we want
# Storing them in a dictionary makes the code cleaner and look different
aug_layers = {
    "Flipped": tf.keras.layers.RandomFlip("horizontal"),
    "Zoomed": tf.keras.layers.RandomZoom(height_factor=0.2),
    "Translated": tf.keras.layers.RandomTranslation(height_factor=0.2, width_factor=0.2)
}

# 2. Get images
my_images = [f for f in os.listdir() if f.endswith('.png') or f.endswith('.jpg')]
my_images.sort()

# 3. Process and Plot
# We will create one row per image, with columns for the augmentations
plt.figure(figsize=(12, 3 * len(my_images)))

for i, filename in enumerate(my_images):
    if i >= 4: break # Limit to first 4 images to keep plot readable

    # --- Step A: Load with OpenCV (Alternative to PIL) ---
    raw_img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    raw_img = cv2.resize(raw_img, (28, 28))

    # Invert and Normalize (Standard Paint -> MNIST prep)
    # This replaces the "1.0 - image" logic with standard numpy math
    clean_img = (255 - raw_img).astype('float32') / 255.0
    input_tensor = clean_img.reshape(1, 28, 28, 1)

    # --- Step B: Plot Original ---
    # Calculate subplot index: (Row * 4 columns) + 1
    plt.subplot(len(my_images), 4, (i * 4) + 1)
    plt.imshow(clean_img, cmap='gray')
    plt.title(f"{filename}\n(Original)")
    plt.axis('off')

    # --- Step C: Apply Augmentations Loop ---
    col_index = 2
    for aug_name, layer in aug_layers.items():
        # Apply transformation (training=True is required for random layers)
        transformed_tensor = layer(input_tensor, training=True)

        # Convert back to numpy for plotting
        result_img = transformed_tensor[0].numpy()

        plt.subplot(len(my_images), 4, (i * 4) + col_index)
        plt.imshow(result_img, cmap='gray')
        plt.title(aug_name)
        plt.axis('off')
        col_index += 1

plt.tight_layout()
plt.show()

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

# 1. Get list of images
image_files = [f for f in os.listdir() if f.endswith('.png') or f.endswith('.jpg')]
image_files.sort()
image_files = image_files[:12] # Limit to 12

plt.figure(figsize=(15, 8))

for i, filename in enumerate(image_files):
    # --- Preprocessing ---
    img_gray = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
    img_resized = cv2.resize(img_gray, (28, 28))

    # Invert and Normalize
    img_inverted = 255 - img_resized
    img_final = img_inverted.astype('float32') / 255.0
    input_tensor = img_final.reshape(1, 28, 28, 1)

    # --- Prediction 1: Non-Augmented Model ---
    pred_no_aug = model_no_aug.predict(input_tensor, verbose=0)
    label_no_aug = pred_no_aug.argmax()

    # --- Prediction 2: Augmented Model ---
    pred_aug = model.predict(input_tensor, verbose=0)
    label_aug = pred_aug.argmax()

    # --- Plotting ---
    plt.subplot(3, 4, i + 1)
    plt.imshow(img_inverted, cmap='gray')

    # Color logic: Green if they agree, Red if they disagree
    title_color = 'black'
    if label_no_aug == label_aug:
        title_text = f"Both agree: {label_no_aug}"
        title_color = 'green'
    else:
        title_text = f"Non-Aug: {label_no_aug} | Aug: {label_aug}"
        title_color = 'red'

    # REMOVED the confidence line here
    plt.title(f"{filename}\n{title_text}", color=title_color, fontsize=10)
    plt.axis('off')

plt.tight_layout()
plt.show()