In [8]:
import os
import shutil
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist, cifar10, 
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define dataset paths for folder-based data
base_dir = '/Users/joanna/Computing_studies/CV/wk_proj3'
src_directory = os.path.join(base_dir, 'dtd/images')
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

# Data loaders
def load_and_process_image_data(dataset_name):
    if dataset_name == 'MNIST':
        (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
        # Convert grayscale to RGB
        train_images = np.stack([train_images] * 3, axis=-1)
        test_images = np.stack([test_images] * 3, axis=-1)
    elif dataset_name == 'CIFAR-10':
        (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
    elif dataset_name == 'Imagenet':
        (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

    # Convert types and normalize
    train_images = train_images.astype('float32') / 255
    test_images = test_images.astype('float32') / 255
    # Resize images to (224, 224)
    train_images = tf.image.resize(train_images, (224, 224))
    test_images = tf.image.resize(test_images, (224, 224))
    # Convert labels to one-hot encoding
    train_labels = to_categorical(train_labels)
    test_labels = to_categorical(test_labels)
    # Split test into validation and test
    test_images, val_images, test_labels, val_labels = train_test_split(test_images, test_labels, test_size=0.5, random_state=42)
    
    return train_images, train_labels, val_images, val_labels, test_images, test_labels

# Prepare folder-based data if necessary
def prepare_directory_data(src_directory, train_dir, val_dir, test_dir):
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(val_dir):
        os.makedirs(val_dir)
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)

        for class_name in os.listdir(src_directory):
            class_dir = os.path.join(src_directory, class_name)
            if os.path.isdir(class_dir):
                os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
                os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)
                os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)
                files = [os.path.join(class_dir, f) for f in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, f))]
                train_files, val_test_files = train_test_split(files, test_size=0.3, random_state=42)
                val_files, test_files = train_test_split(val_test_files, test_size=0.5, random_state=42)
                for f in train_files:
                    shutil.copy(f, os.path.join(train_dir, class_name))
                for f in val_files:
                    shutil.copy(f, os.path.join(val_dir, class_name))
                for f in test_files:
                    shutil.copy(f, os.path.join(test_dir, class_name))


# Data generators for folder-based data
def create_generators(train_dir, val_dir, test_dir, batch_size=32):
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    test_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical'
    )
    val_generator = test_datagen.flow_from_directory(
        val_dir,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical'
    )
    test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='categorical'
    )
    return train_generator, val_generator, test_generator

# Model setup
def setup_model(output_classes):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    # Freeze the base model layers
    for layer in base_model.layers:
        layer.trainable = False

    x = Flatten()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.25)(x)
    predictions = Dense(output_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Choose dataset and prepare data
dataset_name = 'DTD'  # Change as needed: 'MNIST', 'CIFAR-10', 'DTD'
if dataset_name in ['MNIST', 'CIFAR-10']:
    train_images, train_labels, val_images, val_labels, test_images, test_labels = load_and_process_image_data(dataset_name)
    model = setup_model(train_labels.shape[1])
    history = model.fit(train_images, train_labels, batch_size=32, epochs=20, validation_data=(val_images, val_labels))
    results = model.evaluate(test_images, test_labels)
else:
    prepare_directory_data(src_directory, train_dir, val_dir, test_dir)
    train_generator, val_generator, test_generator = create_generators(train_dir, val_dir, test_dir)
    model = setup_model(len(os.listdir(train_dir)))  # Number of classes
    history = model.fit(train_generator, epochs=20, validation_data=val_generator)
    results = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)  # Adjusted line

print("Test Accuracy: ", results[1])


Found 3948 images belonging to 47 classes.
Found 846 images belonging to 47 classes.
Found 846 images belonging to 47 classes.
Epoch 1/20
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m417s[0m 3s/step - accuracy: 0.1056 - loss: 4.2682 - val_accuracy: 0.3274 - val_loss: 2.5562
Epoch 2/20
[1m 47/124[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m3:36[0m 3s/step - accuracy: 0.2711 - loss: 2.7583

KeyboardInterrupt: 

In [10]:
image,labels = next(train_generator)

In [11]:
image.shape, labels.shape

((32, 224, 224, 3), (32, 47))

In [3]:
model.summary()

In [6]:
import os

def count_classes(directory):
    # List all items in the directory
    items = os.listdir(directory)
    
    # Filter out only directories
    classes = [item for item in items if os.path.isdir(os.path.join(directory, item))]
    
    # Return the number of directories
    return len(classes)

# Example usage
dataset_directory = 'dtd'  # Update this to your dataset path
num_classes = count_classes(dataset_directory)
print("Number of classes in the dataset:", num_classes)


Number of classes in the dataset: 3
