In [21]:
import os
import shutil
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist, cifar10 
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [22]:
# Define dataset paths for folder-based data
base_dir = '~/notebook'
src_directory = os.path.join(base_dir, 'dtd/images')
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

In [23]:
img_size = (32,32)
# Data loaders
def load_and_process_image_data(dataset_name):
    if dataset_name == 'MNIST':
        (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
        # Convert grayscale to RGB
        train_images = np.stack([train_images] * 3, axis=-1)
        test_images = np.stack([test_images] * 3, axis=-1)
    elif dataset_name == 'CIFAR-10':
        (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

    # Convert types and normalize
    train_images = train_images.astype('float32') / 255
    test_images = test_images.astype('float32') / 255
    # Resize images to (128, 128) instead of (224, 224)
    train_images = np.array([tf.image.resize(img, img_size).numpy() for img in train_images])
    test_images = np.array([tf.image.resize(img, img_size).numpy() for img in test_images])
    # Convert labels to one-hot encoding
    train_labels = to_categorical(train_labels)
    test_labels = to_categorical(test_labels)
    # Split test into validation and test
    test_images, val_images, test_labels, val_labels = train_test_split(test_images, test_labels, test_size=0.5, random_state=42)
    
    return train_images, train_labels, val_images, val_labels, test_images, test_labels

In [24]:
def prepare_directory_data(src_directory, train_dir, val_dir, test_dir):
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(val_dir):
        os.makedirs(val_dir)
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)

    # Loop through each class directory within the source directory
    for class_name in os.listdir(src_directory):
        class_dir = os.path.join(src_directory, class_name)
        if os.path.isdir(class_dir):
            # Create directories for each class within train, validation, and test directories
            os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
            os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)
            os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)
            # Get all file paths from the class directory
            files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
            # Split files into training and temporary validation/test files
            train_files, val_test_files = train_test_split(files, test_size=0.3, random_state=42)
            # Further split the temporary validation/test files into validation and test sets
            val_files, test_files = train_test_split(val_test_files, test_size=0.5, random_state=42)
            # Copy files to their respective directories
            for f in train_files:
                shutil.copy(f, os.path.join(train_dir, class_name))
            for f in val_files:
                shutil.copy(f, os.path.join(val_dir, class_name))
            for f in test_files:
                shutil.copy(f, os.path.join(test_dir, class_name))

In [25]:
# Data generators for folder-based data
def create_generators(train_dir, val_dir, test_dir, batch_size=32):
    train_datagen = ImageDataGenerator(
        rescale=        1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    test_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(32, 32),
        batch_size=batch_size,
        class_mode='categorical'
    )
    val_generator = test_datagen.flow_from_directory(
        val_dir,
        target_size=(32, 32),
        batch_size=batch_size,
        class_mode='categorical'
    )
    test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(32, 32),
        batch_size=batch_size,
        class_mode='categorical'
    )
    return train_generator, val_generator, test_generator

In [26]:
# Model setup
def setup_model(output_classes):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=img_size+(3,))
    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    x = Flatten()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.25)(x)
    predictions = Dense(output_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    model.compile(optimizer=Adam(learning_rate=0.00005), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [27]:
# Choose dataset and prepare data
dataset_name = 'CIFAR-10'
train_images, train_labels, val_images, val_labels, test_images, test_labels = load_and_process_image_data(dataset_name)
model = setup_model(train_labels.shape[1])
model.summary()

In [28]:
history = model.fit(train_images, train_labels, batch_size=32, epochs=20, validation_data=(val_images, val_labels))

Epoch 1/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 77ms/step - accuracy: 0.2814 - loss: 2.0254 - val_accuracy: 0.4838 - val_loss: 1.5197
Epoch 2/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 78ms/step - accuracy: 0.4836 - loss: 1.4981 - val_accuracy: 0.5172 - val_loss: 1.3953
Epoch 3/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 77ms/step - accuracy: 0.5162 - loss: 1.3922 - val_accuracy: 0.5384 - val_loss: 1.3366
Epoch 4/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 77ms/step - accuracy: 0.5378 - loss: 1.3335 - val_accuracy: 0.5534 - val_loss: 1.3012
Epoch 5/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 78ms/step - accuracy: 0.5536 - loss: 1.2902 - val_accuracy: 0.5566 - val_loss: 1.2759
Epoch 6/20
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 78ms/step - accuracy: 0.5660 - loss: 1.2504 - val_accuracy: 0.5638 - val_loss: 1.253

In [29]:
results = model.evaluate(test_images, test_labels)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 69ms/step - accuracy: 0.5959 - loss: 1.1572
