# Preprocessing of the CIFAR-10 dataset images

## 1. Loading the dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 0us/step


## 2. Defining the number of output classes (Labels) and input shape to be used in the models

In [None]:
num_classes = 10
input_shape = (32, 32, 3)

class_mapper = {
    0: "airplane",
    1: "automobile",
    2: "bird",
    3: "cat",
    4: "deer",
    5: "dog",
    6: "frog",
    7: "horse",
    8: "ship",
    9: "truck"
}


## 3. Converting labels to one-hot encoded form

In [None]:
# convert labels to one-hot encoded form
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

## 4. Normalizing the input data

In [None]:
# normalize color of images (255)
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

## 5. Data visualization

In [None]:
# Plot 10x10 images (10 per class)
samples_per_class = 10
fig, axes = plt.subplots(10, 10, figsize=(12, 12))
fig.suptitle("10 Random Samples from Each CIFAR-10 Class", fontsize=16)

for class_id in range(10):
    idxs = np.flatnonzero(y_train == class_id)
    selected = np.random.choice(idxs, samples_per_class, replace=False)
    for i, idx in enumerate(selected):
        ax = axes[class_id, i]
        ax.imshow(x_train[idx])
        ax.axis('off')
        if i == 0:
            ax.set_title(class_names[class_id], fontsize=8)

plt.tight_layout()
plt.subplots_adjust(top=0.95)
plt.show()

## 6. Augmentation

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras import layers

augmentation_layers = Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2)
])