In [3]:
import  numpy as np
import tensorflow as tf
import pandas as pd
from pathlib import Path
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
#import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score, confusion_matrix,classification_report




In [4]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

DATA VISUALIZATION

In [5]:
#import tensorflow as tf
#print(tf.__version__)

In [6]:
def show_image(img: np.ndarray, ax: plt.Axes=None, title: str=None) -> None:
    """ Show array as image """
    if ax is None:
        ax = plt.subplot(111)
    ax.imshow(img)
    ax.axis("off")
    if title:
        ax.set_title(title)

NameError: name 'plt' is not defined

LABELS OF DATASET

In [None]:
labels = ['airplane', 'automobile', 'bird', 'cat', 'deer',
          'dog', 'frog', 'horse', 'ship', 'truck']


In [None]:
#Visualization
def get_rgb_image(file_var):
    R = file_var[:, :1024].reshape(-1, 32, 32)
    G = file_var[:, 1024:2048].reshape(-1, 32, 32)
    B = file_var[:, 2048:].reshape(-1, 32, 32)
    rgb = np.stack((R, G, B), axis=-1)
    return rgb 

LOADING OF BATCH FILE

In [None]:
def process_batch(file_path):
    data = unpickle(file_path)
    uncleanedX = data[b'data']
    x = get_rgb_image(uncleanedX)
    y = data[b'labels']
    return x, y

LOADING OF BATCHES

In [None]:
#Load Batches
data_dir = "../data/cifar-10-batches-py/"
files = [i for i in Path(data_dir).glob("data_batch*")]
files


[WindowsPath('../data/cifar-10-batches-py/data_batch_1'),
 WindowsPath('../data/cifar-10-batches-py/data_batch_2'),
 WindowsPath('../data/cifar-10-batches-py/data_batch_3'),
 WindowsPath('../data/cifar-10-batches-py/data_batch_4'),
 WindowsPath('../data/cifar-10-batches-py/data_batch_5')]

TRAINING SET

In [None]:
x, y = [], []
for i in files:
    xi, yi = process_batch(i)
    x.append(xi)
    y.append(yi)

X_train = np.concatenate(x)
y_train = np.concatenate(y)
X_val = np.concatenate(x)
y_val = np.concatenate(y)
X_train.shape
X_val.shape


    

(50000, 32, 32, 3)

TEST SET

In [None]:
X_test, y_test = process_batch("../data/cifar-10-batches-py/test_batch")
y_test = np.array(y_test)
y_test.shape

(10000,)

NORMALIZATION

In [None]:
X_train = X_train.astype("float32") / 255
X_test = X_test.astype("float32") / 255

In [None]:
y_train = y_train.astype(int)
y_test = y_test.astype(int)

SPLITTING OF TRAIN SET

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0) 
X_train.shape, X_val.shape, y_val.shape

((40000, 32, 32, 3), (10000, 32, 32, 3), (10000,))

BUILDING AND TRAINING OF MODEL

In [None]:
inputs = tf.keras.Input(shape=(32,32,3))
#x = data_augmentation(inputs)

x = layers.Conv2D(32, (3,3), padding='same', activation='relu',kernel_regularizer=regularizers.l2(1e-4))(inputs)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2,2))(x)

x = layers.Conv2D(64, (3,3), padding='same', activation='relu',kernel_regularizer=regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2,2))(x)


x = layers.Conv2D(128, (3,3), padding='same', activation='relu',kernel_regularizer=regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2,2))(x)

x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu',kernel_regularizer=regularizers.l2(1e-4))(x)
x = layers.Dropout(0.5)(x)

outputs = layers.Dense(10, activation='softmax',kernel_regularizer=regularizers.l2(1e-4))(x)


model = models.Model(inputs, outputs)


# Summary
model.summary()
print("Total params:", model.count_params())



Total params: 357706


Overfitting and Training Time

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

early_stop = EarlyStopping(
    monitor='val_loss',   # watch validation loss
    patience=5,           # stop if no improvement for 3 epochs
    restore_best_weights=True,
    verbose = 1
)

LEARNING RATE

In [None]:
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

Saves the best Model

In [None]:
checkpoint = ModelCheckpoint(
    'best_model.keras',
    monitor='val_loss',
    save_best_only=True,
    verbose = 1
    
)

In [None]:
history = model.fit(
    X_train, y_train,
     batch_size=32,
    epochs=50,
    validation_data=(X_val, y_val),
     callbacks=[early_stop, reduce_lr, checkpoint]
)

Epoch 1/50
[1m1249/1250[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 22ms/step - accuracy: 0.3120 - loss: 1.9861
Epoch 1: val_loss improved from None to 1.39711, saving model to best_model.keras
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 25ms/step - accuracy: 0.3758 - loss: 1.7602 - val_accuracy: 0.5093 - val_loss: 1.3971 - learning_rate: 0.0010
Epoch 2/50
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.4914 - loss: 1.4440
Epoch 2: val_loss improved from 1.39711 to 1.19926, saving model to best_model.keras
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 26ms/step - accuracy: 0.5156 - loss: 1.3935 - val_accuracy: 0.6077 - val_loss: 1.1993 - learning_rate: 0.0010
Epoch 3/50
[1m1248/1250[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 22ms/step - accuracy: 0.6007 - loss: 1.2044
Epoch 3: val_loss improved from 1.19926 to 1.10343, saving model to best_model.keras
[1m1250/1250[0m [3

Train And Validation Losses

In [None]:
df_hist = pd.DataFrame(history.history)
df_hist[["loss", "val_loss"]].plot()

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

OVERALL ACCURACY OF TEST SET

In [None]:
y_pred_probs = model.predict(X_test)
y_preds = np.argmax(y_pred_probs, axis=1)
overall_accuracy = accuracy_score(y_preds,y_test)
print(f"Overall Accuracy: {overall_accuracy:.4f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Overall Accuracy: 0.7763


In [None]:
labels = ['airplane', 'automobile', 'bird', 'cat', 'deer',
          'dog', 'frog', 'horse', 'ship', 'truck']

print(classification_report(y_test, y_preds, target_names=labels))

f1 = f1_score(y_test, y_preds, average='weighted')
print(f" F1 Score: {f1:.4f}")


              precision    recall  f1-score   support

    airplane       0.81      0.80      0.80      1000
  automobile       0.89      0.88      0.88      1000
        bird       0.70      0.67      0.68      1000
         cat       0.62      0.60      0.61      1000
        deer       0.70      0.77      0.73      1000
         dog       0.72      0.63      0.67      1000
        frog       0.77      0.85      0.81      1000
       horse       0.82      0.82      0.82      1000
        ship       0.88      0.89      0.89      1000
       truck       0.85      0.85      0.85      1000

    accuracy                           0.78     10000
   macro avg       0.78      0.78      0.78     10000
weighted avg       0.78      0.78      0.78     10000

 F1 Score: 0.7753


CONFUSION MATRIX

In [None]:
# Generate Confusion Matrix
labels = ['airplane', 'automobile', 'bird', 'cat', 'deer',
          'dog', 'frog', 'horse', 'ship', 'truck']
cm = confusion_matrix(y_test, y_preds)

# Plot Confusion Matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix - CIFAR-10")
plt.show()


In [None]:
misclassified_idx = np.where(y_preds != y_test)[0]
print("Total misclassified samples:", len(misclassified_idx))

# Show first 9 misclassified samples
plt.figure(figsize=(12,12))

for i, idx in enumerate(misclassified_idx[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[idx])  # show image
    plt.title(f"True: {labels[y_true[idx]]}\nPred: {labels[y_preds[idx]]}")
    plt.axis("off")

plt.tight_layout()
plt.show()

NameError: name 'y_preds' is not defined