# Vision AI  – End-to-End Notebook

1) Load & preprocess CIFAR-10

2) Train a simple CNN

3) Add data augmentation + evaluate (accuracy, precision, recall, F1, confusion matrix)

4) Transfer Learning with MobileNetV2 (fine-tune a few layers)

5) Save plots, model, and demo predictions




In [None]:
import sys, os, time, random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix

print('TensorFlow:', tf.__version__)

In [None]:

from tensorflow.keras.datasets import cifar10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = y_train.flatten(); y_test = y_test.flatten()
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
x_train = x_train.astype('float32')/255.0
x_test = x_test.astype('float32')/255.0
x_val, y_val = x_train[-5000:], y_train[-5000:]
x_train, y_train = x_train[:-5000], y_train[:-5000]
x_train.shape, x_val.shape, x_test.shape

In [None]:

plt.figure()
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.xticks([]); plt.yticks([])
    plt.imshow(x_train[i])
    plt.xlabel(class_names[y_train[i]])
plt.tight_layout()
plt.savefig('samples_grid.png', dpi=160)
plt.show()

In [None]:

def build_simple_cnn(input_shape=(32,32,3), num_classes=10):
    model = models.Sequential([
        layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(128, (3,3), activation='relu'),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

cnn = build_simple_cnn()
cnn.summary()

In [None]:

EPOCHS = 3 #@param {type:"slider", min:2, max:30, step:1}
BATCH_SIZE = 32 #@param {type:"slider", min:32, max:256, step:32}
history = cnn.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=EPOCHS, batch_size=BATCH_SIZE)

plt.figure()
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend(); plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('CNN Accuracy')
plt.savefig('cnn_accuracy.png', dpi=160)
plt.show()

plt.figure()
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend(); plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('CNN Loss')
plt.savefig('cnn_loss.png', dpi=160)
plt.show()

In [None]:
# Evaluate CNN + Confusion Matrix + Classification Report
y_pred = np.argmax(cnn.predict(x_test), axis=1)
print(classification_report(y_test, y_pred, target_names=class_names))
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,5))
plt.imshow(cm, interpolation='nearest')
plt.title('Confusion Matrix (CNN)')
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45, ha='right')
plt.yticks(tick_marks, class_names)
plt.tight_layout()
plt.ylabel('True label'); plt.xlabel('Predicted label')
plt.savefig('cnn_confusion_matrix.png', dpi=200, bbox_inches='tight')
plt.show()

In [None]:

datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
)
datagen.fit(x_train)

aug_cnn = build_simple_cnn()
steps = len(x_train)//64
aug_history = aug_cnn.fit(datagen.flow(x_train, y_train, batch_size=32),
                          validation_data=(x_val, y_val),
                          epochs=3,
                          steps_per_epoch=steps)

plt.figure()
plt.plot(aug_history.history['accuracy'], label='train_acc')
plt.plot(aug_history.history['val_accuracy'], label='val_acc')
plt.legend(); plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('Aug CNN Accuracy')
plt.savefig('aug_accuracy.png', dpi=160)
plt.show()

In [None]:
IMG_SIZE = 64
def upscale(images):
    return tf.image.resize(images, (IMG_SIZE, IMG_SIZE))

x_train_up = upscale(x_train).numpy()
x_val_up = upscale(x_val).numpy()
x_test_up = upscale(x_test).numpy()

base = tf.keras.applications.MobileNetV2(
    input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights='imagenet')
base.trainable = False  # freeze base

inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs)
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(64, activation='relu')(x)  # smaller dense layer
outputs = layers.Dense(10, activation='softmax')(x)
tl_model = tf.keras.Model(inputs, outputs)

tl_model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

# Reduce batch size & epochs
tl_hist = tl_model.fit(
    x_train_up, y_train,
    validation_data=(x_val_up, y_val),
    epochs=3,        # was 5
    batch_size=32,   # was 64
    verbose=1
)

# Optional fine-tuning (keep very light)
base.trainable = True
for layer in base.layers[:-50]:  # freeze most layers
    layer.trainable = False

tl_model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

ft_hist = tl_model.fit(
    x_train_up, y_train,
    validation_data=(x_val_up, y_val),
    epochs=2,       # was 3
    batch_size=32,
    verbose=1
)

# Plot
plt.figure()
plt.plot(tl_hist.history['val_accuracy'] + ft_hist.history['val_accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Val Accuracy')
plt.title('Transfer Learning Val Accuracy')
plt.savefig('tl_val_acc.png', dpi=160)
plt.show()


In [None]:
# Evaluate Transfer Learning Model
y_pred_tl = np.argmax(tl_model.predict(x_test_up), axis=1)
print(classification_report(y_test, y_pred_tl, target_names=class_names))
cm_tl = confusion_matrix(y_test, y_pred_tl)
plt.figure(figsize=(6,5))
plt.imshow(cm_tl, interpolation='nearest')
plt.title('Confusion Matrix (Transfer Learning)')
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45, ha='right')
plt.yticks(tick_marks, class_names)
plt.tight_layout()
plt.ylabel('True label'); plt.xlabel('Predicted label')
plt.savefig('tl_confusion_matrix.png', dpi=200, bbox_inches='tight')
plt.show()

# Save models
cnn.save('cnn_cifar10.h5')
aug_cnn.save('aug_cnn_cifar10.h5')
tl_model.save('mobilenetv2_cifar10.h5')

In [None]:

from tensorflow.keras.preprocessing import image
from google.colab import files
import numpy as np

def predict_image(img_path, model='cnn'):
    if model == 'cnn':
        model_obj = cnn
        img = tf.keras.utils.load_img(img_path, target_size=(32,32))
        arr = tf.keras.utils.img_to_array(img)/255.0
        arr = np.expand_dims(arr, axis=0)
        pred = np.argmax(model_obj.predict(arr), axis=1)[0]
        return class_names[pred]
    else:
        model_obj = tl_model
        img = tf.keras.utils.load_img(img_path, target_size=(64,64))
        arr = tf.keras.utils.img_to_array(img)
        arr = tf.keras.applications.mobilenet_v2.preprocess_input(arr)
        arr = np.expand_dims(arr, axis=0)
        pred = np.argmax(model_obj.predict(arr), axis=1)[0]
        return class_names[pred]

print('Upload an image for a quick demo .')
#try:
uploaded = files.upload()
for fname in uploaded.keys():
    print('Predicting with TL model:', fname, '->', predict_image(fname, model='tl'))
#except Exception as e:
    #print('Skip upload demo if not in Colab:', e)

Upload an image for a quick demo .
