<a href="https://colab.research.google.com/github/Reyhaneh2001/DL_CIFAR10/blob/main/DL_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, datasets
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score
from keras.utils import to_categorical
import keras_tuner as kt

In [None]:
!pip install keras_tuner

Collecting keras_tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras_tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras_tuner
Successfully installed keras_tuner-1.4.7 kt-legacy-1.0.5


In [None]:
# 1. Load CIFAR10 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# 2. Display dimensions of images and outputs
print("Training images shape:", x_train.shape)
print("Training labels shape:", y_train.shape)
print("Test images shape:", x_test.shape)
print("Test labels shape:", y_test.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 0us/step
Training images shape: (50000, 32, 32, 3)
Training labels shape: (50000, 1)
Test images shape: (10000, 32, 32, 3)
Test labels shape: (10000, 1)


In [None]:

# 3. Split data - 15% for test, rest for train/validation
# First combine original train and test
x_all = np.concatenate([x_train, x_test])
y_all = np.concatenate([y_train, y_test])

# Now split into 15% test and 85% train_val
x_train_val, x_test, y_train_val, y_test = train_test_split(
    x_all, y_all, test_size=0.15, random_state=42, stratify=y_all)

# Now split train_val into train and validation (80% train, 20% validation of the 85%)
x_train, x_val, y_train, y_val = train_test_split(
    x_train_val, y_train_val, test_size=0.2, random_state=42, stratify=y_train_val)

print("\nAfter splitting:")
print("Train images shape:", x_train.shape)
print("Train labels shape:", y_train.shape)
print("Validation images shape:", x_val.shape)
print("Validation labels shape:", y_val.shape)
print("Test images shape:", x_test.shape)
print("Test labels shape:", y_test.shape)


After splitting:
Train images shape: (40800, 32, 32, 3)
Train labels shape: (40800, 1)
Validation images shape: (10200, 32, 32, 3)
Validation labels shape: (10200, 1)
Test images shape: (9000, 32, 32, 3)
Test labels shape: (9000, 1)


In [None]:
# 5. Convert labels to one-hot encoding for CategoricalCrossentropy
y_train_cat = to_categorical(y_train, num_classes=10)
y_val_cat = to_categorical(y_val, num_classes=10)
y_test_cat = to_categorical(y_test, num_classes=10)

print("\nOne-hot encoded labels (sample):")
print(y_train_cat[:5])


One-hot encoded labels (sample):
[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]


In [None]:
# Normalize pixel values to be between 0 and 1
x_train = x_train.astype('float32') / 255
x_val = x_val.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [None]:
# 6. Build a model with up to 5 hidden layers
def build_basic_model():
    model = keras.Sequential([
        layers.Input(shape=(32, 32, 3)),

        # Hidden layers
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),

        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(64, activation='relu'),

        # Output layer
        layers.Dense(10, activation='softmax')
    ])

    model.compile(optimizer='adam',
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

    return model

basic_model = build_basic_model()
basic_model.summary()

In [None]:
# Train the basic model
history = basic_model.fit(x_train, y_train_cat,
                          epochs=15,
                          batch_size=64,
                          validation_data=(x_val, y_val_cat))

Epoch 1/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 146ms/step - accuracy: 0.3209 - loss: 1.8253 - val_accuracy: 0.5441 - val_loss: 1.2599
Epoch 2/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 144ms/step - accuracy: 0.5420 - loss: 1.2753 - val_accuracy: 0.6303 - val_loss: 1.0485
Epoch 3/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 145ms/step - accuracy: 0.6261 - loss: 1.0548 - val_accuracy: 0.6606 - val_loss: 0.9649
Epoch 4/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 149ms/step - accuracy: 0.6787 - loss: 0.9182 - val_accuracy: 0.6786 - val_loss: 0.9174
Epoch 5/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 153ms/step - accuracy: 0.7133 - loss: 0.8207 - val_accuracy: 0.7109 - val_loss: 0.8473
Epoch 6/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 147ms/step - accuracy: 0.7525 - loss: 0.7022 - val_accuracy: 0.7197 - val_loss: 0.8222
Epoch 7

In [None]:
# Evaluate basic model
test_loss, test_acc = basic_model.evaluate(x_test, y_test_cat)
print(f"\nBasic Model Test Accuracy: {test_acc:.4f}")

[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.7378 - loss: 1.0779

Basic Model Test Accuracy: 0.7392


In [None]:
# 7. Hyperparameter tuning with Keras Tuner
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=(32, 32, 3)))

    # Tune number of convolutional layers
    for i in range(hp.Int('num_conv_layers', 1, 3)):
        model.add(layers.Conv2D(
            hp.Int(f'conv_{i}_units', min_value=32, max_value=128, step=32),
            (3, 3), activation='relu', padding='same'))
        model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Flatten())

    # Tune number of dense layers
    for i in range(hp.Int('num_dense_layers', 1, 2)):
        model.add(layers.Dense(
            hp.Int(f'dense_{i}_units', min_value=32, max_value=256, step=32),
            activation='relu'))
        model.add(layers.Dropout(
            hp.Float(f'dropout_{i}_rate', min_value=0.0, max_value=0.5, step=0.1)))

    model.add(layers.Dense(10, activation='softmax'))

    # Tune learning rate
    lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

In [None]:
tuner = kt.Hyperband(build_model,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='tuning',
                     project_name='cifar10')

tuner.search(x_train, y_train_cat,
             epochs=15,
             validation_data=(x_val, y_val_cat),
             callbacks=[keras.callbacks.EarlyStopping(patience=2)])

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
Optimal hyperparameters:
- Number of convolutional layers: {best_hps.get('num_conv_layers')}
- Number of dense layers: {best_hps.get('num_dense_layers')}
- Learning rate: {best_hps.get('learning_rate')}
""")


Trial 30 Complete [00h 03m 42s]
val_accuracy: 0.10000000149011612

Best val_accuracy So Far: 0.7071568369865417
Total elapsed time: 04h 47m 04s

Optimal hyperparameters:
- Number of convolutional layers: 2
- Number of dense layers: 1
- Learning rate: 0.001



In [None]:
# Build and train the optimized model
optimized_model = tuner.hypermodel.build(best_hps)

optimized_history = optimized_model.fit(x_train, y_train_cat,
                                       epochs=30,
                                       batch_size=64,
                                       validation_data=(x_val, y_val_cat),
                                       callbacks=[keras.callbacks.EarlyStopping(patience=3)])

Epoch 1/30
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m213s[0m 324ms/step - accuracy: 0.3240 - loss: 1.8410 - val_accuracy: 0.5603 - val_loss: 1.2594
Epoch 2/30
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 283ms/step - accuracy: 0.5318 - loss: 1.3053 - val_accuracy: 0.6088 - val_loss: 1.0991
Epoch 3/30
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 279ms/step - accuracy: 0.6086 - loss: 1.1050 - val_accuracy: 0.6366 - val_loss: 1.0540
Epoch 4/30
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 305ms/step - accuracy: 0.6395 - loss: 1.0260 - val_accuracy: 0.6684 - val_loss: 0.9460
Epoch 5/30
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 280ms/step - accuracy: 0.6728 - loss: 0.9348 - val_accuracy: 0.6687 - val_loss: 0.9395
Epoch 6/30
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 307ms/step - accuracy: 0.6921 - loss: 0.8717 - val_accuracy: 0.6903 - val_loss: 0.8943
Epoc

In [None]:
# Evaluate optimized model
test_loss, test_acc = optimized_model.evaluate(x_test, y_test_cat)
print(f"\nOptimized Model Test Accuracy: {test_acc:.4f}")

# 8. Calculate metrics for both models
def calculate_metrics(model, x, y_true, y_true_cat):
    y_pred = model.predict(x)
    y_pred_class = np.argmax(y_pred, axis=1)

    # For binary metrics, we need to binarize the output
    report = classification_report(y_true, y_pred_class, output_dict=True)
    accuracy = report['accuracy']
    f1_score = report['macro avg']['f1-score']

    # ROC AUC requires probability estimates
    roc_auc = roc_auc_score(y_true_cat, y_pred, multi_class='ovr')

    return {
        'Accuracy': accuracy,
        'F1-Score': f1_score,
        'ROC AUC': roc_auc
    }

[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 39ms/step - accuracy: 0.7048 - loss: 0.8694

Optimized Model Test Accuracy: 0.7070


In [None]:
# Calculate metrics for basic model
basic_train_metrics = calculate_metrics(basic_model, x_train, y_train, y_train_cat)
basic_val_metrics = calculate_metrics(basic_model, x_val, y_val, y_val_cat)
basic_test_metrics = calculate_metrics(basic_model, x_test, y_test, y_test_cat)

# Calculate metrics for optimized model
optimized_train_metrics = calculate_metrics(optimized_model, x_train, y_train, y_train_cat)
optimized_val_metrics = calculate_metrics(optimized_model, x_val, y_val, y_val_cat)
optimized_test_metrics = calculate_metrics(optimized_model, x_test, y_test, y_test_cat)


[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 21ms/step
[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 22ms/step
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 36ms/step
[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 37ms/step


In [None]:
import pandas as pd

metrics_df = pd.DataFrame({
    'Model': ['Basic']*3 + ['Optimized']*3,
    'Dataset': ['Train', 'Validation', 'Test']*2,
    'Accuracy': [
        basic_train_metrics['Accuracy'],
        basic_val_metrics['Accuracy'],
        basic_test_metrics['Accuracy'],
        optimized_train_metrics['Accuracy'],
        optimized_val_metrics['Accuracy'],
        optimized_test_metrics['Accuracy']
    ],
    'F1-Score': [
        basic_train_metrics['F1-Score'],
        basic_val_metrics['F1-Score'],
        basic_test_metrics['F1-Score'],
        optimized_train_metrics['F1-Score'],
        optimized_val_metrics['F1-Score'],
        optimized_test_metrics['F1-Score']],
    'ROC AUC': [
        basic_train_metrics['ROC AUC'],
        basic_val_metrics['ROC AUC'],
        basic_test_metrics['ROC AUC'],
        optimized_train_metrics['ROC AUC'],
        optimized_val_metrics['ROC AUC'],
        optimized_test_metrics['ROC AUC']
    ]
})

print("\nMetrics Comparison:")
print(metrics_df)


Metrics Comparison:
       Model     Dataset  Accuracy  F1-Score   ROC AUC
0      Basic       Train  0.970564  0.970567  0.999431
1      Basic  Validation  0.746078  0.746612  0.961962
2      Basic        Test  0.739222  0.739686  0.962295
3  Optimized       Train  0.831225  0.831329  0.986348
4  Optimized  Validation  0.703529  0.704012  0.955030
5  Optimized        Test  0.707000  0.707462  0.957478


In [None]:
# 9. Wide and Deep Network
input_layer = layers.Input(shape=(32, 32, 3))

# Wide path (few layers, more units)
wide_path = layers.Conv2D(128, (5, 5), activation='relu', padding='same')(input_layer)
wide_path = layers.MaxPooling2D((2, 2))(wide_path)
wide_path = layers.Conv2D(256, (5, 5), activation='relu', padding='same')(wide_path)
wide_path = layers.MaxPooling2D((2, 2))(wide_path)
wide_path = layers.Flatten()(wide_path)

# Deep path (more layers, fewer units)
deep_path = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_layer)
deep_path = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(deep_path)
deep_path = layers.MaxPooling2D((2, 2))(deep_path)
deep_path = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(deep_path)
deep_path = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(deep_path)
deep_path = layers.MaxPooling2D((2, 2))(deep_path)
deep_path = layers.Flatten()(deep_path)
# Concatenate both paths
merged = layers.concatenate([wide_path, deep_path])
merged = layers.Dense(256, activation='relu')(merged)
merged = layers.Dropout(0.5)(merged)
output_layer = layers.Dense(10, activation='softmax')(merged)

wide_deep_model = keras.Model(inputs=input_layer, outputs=output_layer)

wide_deep_model.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])

wide_deep_model.summary()

In [None]:
# Train wide and deep model
wide_deep_history = wide_deep_model.fit(x_train, y_train_cat,
                                       epochs=15,
                                       batch_size=64,
                                       validation_data=(x_val, y_val_cat),
                                       callbacks=[keras.callbacks.EarlyStopping(patience=1)])


Epoch 1/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1422s[0m 2s/step - accuracy: 0.3964 - loss: 1.6576 - val_accuracy: 0.5803 - val_loss: 1.1858
Epoch 2/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1509s[0m 2s/step - accuracy: 0.5679 - loss: 1.2175 - val_accuracy: 0.6676 - val_loss: 0.9473
Epoch 3/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1530s[0m 2s/step - accuracy: 0.6482 - loss: 0.9963 - val_accuracy: 0.6917 - val_loss: 0.8657
Epoch 4/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1474s[0m 2s/step - accuracy: 0.6974 - loss: 0.8643 - val_accuracy: 0.7049 - val_loss: 0.8312
Epoch 5/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1524s[0m 2s/step - accuracy: 0.7229 - loss: 0.7948 - val_accuracy: 0.7248 - val_loss: 0.7816
Epoch 6/15
[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1514s[0m 2s/step - accuracy: 0.7516 - loss: 0.7060 - val_accuracy: 0.7325 - val_loss: 0.7630
Epoch 7/15
[1m6

In [None]:
# Evaluate wide and deep model
test_loss, test_acc = wide_deep_model.evaluate(x_test, y_test_cat)
print(f"\nWide & Deep Model Test Accuracy: {test_acc:.4f}")

# Calculate metrics for wide and deep model
wide_deep_train_metrics = calculate_metrics(wide_deep_model, x_train, y_train, y_train_cat)
wide_deep_val_metrics = calculate_metrics(wide_deep_model, x_val, y_val, y_val_cat)
wide_deep_test_metrics = calculate_metrics(wide_deep_model, x_test, y_test, y_test_cat)
# Add to comparison table
wide_deep_df = pd.DataFrame({
    'Model': ['Wide & Deep']*3,
    'Dataset': ['Train', 'Validation', 'Test'],
    'Accuracy': [
        wide_deep_train_metrics['Accuracy'],
        wide_deep_val_metrics['Accuracy'],
        wide_deep_test_metrics['Accuracy']
    ],
    'F1-Score': [
        wide_deep_train_metrics['F1-Score'],
        wide_deep_val_metrics['F1-Score'],
        wide_deep_test_metrics['F1-Score']
    ],
    'ROC AUC': [
        wide_deep_train_metrics['ROC AUC'],
        wide_deep_val_metrics['ROC AUC'],
        wide_deep_test_metrics['ROC AUC']
    ]
})
full_metrics_df = pd.concat([metrics_df, wide_deep_df], ignore_index=True)
print("\nFull Metrics Comparison:")
print(full_metrics_df)

[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 276ms/step - accuracy: 0.7387 - loss: 0.7463

Wide & Deep Model Test Accuracy: 0.7336
[1m1275/1275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m345s[0m 271ms/step
[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 275ms/step
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 271ms/step

Full Metrics Comparison:
         Model     Dataset  Accuracy  F1-Score   ROC AUC
0        Basic       Train  0.970564  0.970567  0.999431
1        Basic  Validation  0.746078  0.746612  0.961962
2        Basic        Test  0.739222  0.739686  0.962295
3    Optimized       Train  0.831225  0.831329  0.986348
4    Optimized  Validation  0.703529  0.704012  0.955030
5    Optimized        Test  0.707000  0.707462  0.957478
6  Wide & Deep       Train  0.837672  0.838883  0.986931
7  Wide & Deep  Validation  0.728529  0.731519  0.964473
8  Wide & Deep        Test  0.733556  0.736224  0.965695
