In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Lambda, Rescaling, Input, Flatten
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.applications.efficientnet import preprocess_input
from keras.callbacks import EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from glob import glob


In [2]:
base_dir = '../data'
batch_size = 16
img_size = (224, 224)

This exercise employs unsupervised techniques to boost or enhance the performance of a supervised classifier.\
Essentially there are two paths forward:\
\
1. Data Augmentaiton: This involves generating new pictures that can then be added to the dataset. This can help boost performance on a supervised classifier due to the increased diversity in the dataset, especially useful for smaller datasets.
2. Feature Extraction: This involves using an unsupervised model to understand and extract meaningful features from the dataset without the labels. These features can be used as a compressed representation to be fed into a supervised learning model, cutting down training time and complexity.
\
\
Given the constraints we ran into with time and complexity in exercise number 1, Data augmentation will probably be less useful here. The dataset is already quite large and seems to be quite diverse already. Simple data augmentation like vertical / horizontal flips, darkening, etc. can be done easily without adding much computational load, but more robust augmentation tehcniques (like using a GAN) introduce more complexity and computational load into a dataset where we are already struggling with complexity.\
\
Therefore we will go the feature extraction route as it promises to help reduce the complexity of the feature space, and will likely help make the training process much easier. Futhermore, we still have the option to do some simple data augmentations that can potentially help boost performance as well without adding complexity in a somewhat hybrid approach. We will save that for after.

We will start with feature extraction using ResNet50 model, which is 

In [3]:
# include_top = False tells it that we do not want the final (top) output / classification layer since we are only interested in the feaeture maps and not predictions.
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# preprocess the input to match the format that ResNet50 model expects
preprocess_input = tf.keras.applications.resnet50.preprocess_input

# create a data preprocessing model
preprocess_model = Sequential([
    tf.keras.layers.experimental.preprocessing.Rescaling(1./255),  # rescale the pixel values
    tf.keras.layers.Lambda(preprocess_input),  # apply ResNet50 preprocessing
])

# define a model that includes preprocessing anf feature extrraction
feature_extractor = Sequential([
    preprocess_model,
    base_model,
    GlobalAveragePooling2D() 
])

# Prepare datasets
train_dataset = image_dataset_from_directory(
    directory=f"{base_dir}/train",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical',
    shuffle=True,
).map(lambda x, y: (preprocess_model(x), y))  

validation_dataset = image_dataset_from_directory(
    directory=f"{base_dir}/valid",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical',
    shuffle=True
).map(lambda x, y: (preprocess_model(x), y))  

test_dataset = image_dataset_from_directory(
    directory=f"{base_dir}/test",
    image_size=img_size,
    batch_size=batch_size,
    label_mode='categorical',
    shuffle=True
).map(lambda x, y: (preprocess_model(x), y)) 


Found 84635 files belonging to 525 classes.
Found 2625 files belonging to 525 classes.
Found 2625 files belonging to 525 classes.


In [4]:
def extract_features(dataset, extractor_model):
    """
    This function extracts features for a given dataset using the specified extractor_model.
    """
    all_features = []
    all_labels = []

    for images, labels in dataset:
        features = extractor_model.predict(images)
        all_features.append(features)
        all_labels.append(labels)

    return np.vstack(all_features), np.vstack(all_labels)

# extract features
train_features, train_labels = extract_features(train_dataset, feature_extractor)
val_features, val_labels = extract_features(validation_dataset, feature_extractor)
test_features, test_labels = extract_features(test_dataset, feature_extractor)

# save features for easy lookup
np.save(f'{base_dir}/ResNet50_features/train_features.npy', train_features)
np.save(f'{base_dir}/ResNet50_features/train_labels.npy', train_labels)
np.save(f'{base_dir}/ResNet50_features/val_features.npy', val_features)
np.save(f'{base_dir}/ResNet50_features/val_labels.npy', val_labels)
np.save(f'{base_dir}/ResNet50_features/test_features.npy', test_features)
np.save(f'{base_dir}/ResNet50_features/test_labels.npy', test_labels)



In [11]:
# load features and labels
train_features = np.load(f'{base_dir}/ResNet50_features/train_features.npy')
train_labels = np.load(f'{base_dir}/ResNet50_features/train_labels.npy')
val_features = np.load(f'{base_dir}/ResNet50_features/val_features.npy')
val_labels = np.load(f'{base_dir}/ResNet50_features/val_labels.npy')
test_features = np.load(f'{base_dir}/ResNet50_features/test_features.npy')
test_labels = np.load(f'{base_dir}/ResNet50_features/test_labels.npy')

# define the arch
model = Sequential([
    Dense(512, activation='relu', input_shape=(train_features.shape[1],)),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(train_labels.shape[1], activation='softmax')  
])

# compile and train
model.compile(optimizer=Adam(lr=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_features, train_labels,
                    epochs=20,
                    batch_size=32,
                    validation_data=(val_features, val_labels),
                    verbose=2)

test_loss, test_acc = model.evaluate(test_features, test_labels, verbose=2)
print(f'Test accuracy: {test_acc}, Test loss: {test_loss}')


  super().__init__(name, **kwargs)


Epoch 1/20
2645/2645 - 15s - loss: 6.2715 - accuracy: 0.0023 - val_loss: 6.2635 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 2/20
2645/2645 - 14s - loss: 6.2616 - accuracy: 0.0028 - val_loss: 6.2642 - val_accuracy: 0.0019 - 14s/epoch - 5ms/step
Epoch 3/20
2645/2645 - 14s - loss: 6.2602 - accuracy: 0.0027 - val_loss: 6.2651 - val_accuracy: 0.0019 - 14s/epoch - 5ms/step
Epoch 4/20
2645/2645 - 14s - loss: 6.2590 - accuracy: 0.0030 - val_loss: 6.2660 - val_accuracy: 0.0019 - 14s/epoch - 5ms/step
Epoch 5/20
2645/2645 - 14s - loss: 6.2585 - accuracy: 0.0030 - val_loss: 6.2664 - val_accuracy: 0.0019 - 14s/epoch - 5ms/step
Epoch 6/20
2645/2645 - 14s - loss: 6.2579 - accuracy: 0.0032 - val_loss: 6.2659 - val_accuracy: 0.0019 - 14s/epoch - 5ms/step
Epoch 7/20
2645/2645 - 14s - loss: 6.2574 - accuracy: 0.0031 - val_loss: 6.2667 - val_accuracy: 0.0019 - 14s/epoch - 5ms/step
Epoch 8/20
2645/2645 - 14s - loss: 6.2570 - accuracy: 0.0030 - val_loss: 6.2682 - val_accuracy: 0.0019 - 14s/epoch - 5

In [12]:
# trying with a simpler archtiecture (since we have asubstantially reduced dimensions)
model = Sequential([
    Dense(256, activation='relu', input_shape=(train_features.shape[1],)),
    Dropout(0.3),  # slightly reduced dropout
    Dense(train_labels.shape[1], activation='softmax') 
])

#compile and train
model.compile(optimizer=Adam(lr=0.001),  # adjusting learning rate to prevent overfitting 
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_features, train_labels,
                    epochs=20,
                    batch_size=32,
                    validation_data=(val_features, val_labels),
                    verbose=2)

test_loss, test_acc = model.evaluate(test_features, test_labels, verbose=2)
print(f'Test accuracy: {test_acc}, Test loss: {test_loss}')


Epoch 1/20
2645/2645 - 13s - loss: 6.2629 - accuracy: 0.0027 - val_loss: 6.2653 - val_accuracy: 0.0019 - 13s/epoch - 5ms/step
Epoch 2/20
2645/2645 - 12s - loss: 6.2584 - accuracy: 0.0031 - val_loss: 6.2676 - val_accuracy: 0.0019 - 12s/epoch - 5ms/step
Epoch 3/20
2645/2645 - 12s - loss: 6.2576 - accuracy: 0.0031 - val_loss: 6.2692 - val_accuracy: 0.0019 - 12s/epoch - 5ms/step
Epoch 4/20
2645/2645 - 12s - loss: 6.2574 - accuracy: 0.0031 - val_loss: 6.2702 - val_accuracy: 0.0019 - 12s/epoch - 5ms/step
Epoch 5/20
2645/2645 - 12s - loss: 6.2573 - accuracy: 0.0031 - val_loss: 6.2706 - val_accuracy: 0.0019 - 12s/epoch - 5ms/step
Epoch 6/20
2645/2645 - 12s - loss: 6.2573 - accuracy: 0.0031 - val_loss: 6.2709 - val_accuracy: 0.0019 - 12s/epoch - 5ms/step
Epoch 7/20
2645/2645 - 12s - loss: 6.2573 - accuracy: 0.0029 - val_loss: 6.2710 - val_accuracy: 0.0019 - 12s/epoch - 5ms/step
Epoch 8/20
2645/2645 - 12s - loss: 6.2573 - accuracy: 0.0031 - val_loss: 6.2711 - val_accuracy: 0.0019 - 12s/epoch - 5

In [13]:
# trying am more complex archtiecture (just as a sanity check)
model = Sequential([
    Dense(1024, activation='relu', input_shape=(train_features.shape[1],)),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(train_labels.shape[1], activation='softmax')  # out
])

# comile and train 
model.compile(optimizer=Adam(lr=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_features, train_labels,
                    epochs=20,
                    batch_size=32,
                    validation_data=(val_features, val_labels),
                    verbose=2)

# eval
test_loss, test_acc = model.evaluate(test_features, test_labels, verbose=2)
print(f'Test accuracy: {test_acc}, Test loss: {test_loss}')

Epoch 1/20
2645/2645 - 16s - loss: 6.2754 - accuracy: 0.0017 - val_loss: 6.2636 - val_accuracy: 0.0019 - 16s/epoch - 6ms/step
Epoch 2/20
2645/2645 - 15s - loss: 6.2622 - accuracy: 0.0025 - val_loss: 6.2637 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 3/20
2645/2645 - 15s - loss: 6.2604 - accuracy: 0.0027 - val_loss: 6.2641 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 4/20
2645/2645 - 15s - loss: 6.2594 - accuracy: 0.0031 - val_loss: 6.2648 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 5/20
2645/2645 - 15s - loss: 6.2584 - accuracy: 0.0030 - val_loss: 6.2666 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 6/20
2645/2645 - 15s - loss: 6.2577 - accuracy: 0.0030 - val_loss: 6.2675 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 7/20
2645/2645 - 15s - loss: 6.2573 - accuracy: 0.0031 - val_loss: 6.2681 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 8/20
2645/2645 - 16s - loss: 6.2572 - accuracy: 0.0032 - val_loss: 6.2685 - val_accuracy: 0.0019 - 16s/epoch - 6

In [5]:
# loadefficientNet Pretrain
base_model = EfficientNetB0(include_top=False, weights='imagenet', pooling='avg') # pooling avg will output a 2d tensor with pooled features

# feature ext
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)

In [9]:
# create a data preprocessing model 
preprocess_model = Sequential([
    Rescaling(1./255),  # rescale
    Lambda(preprocess_input) # preprocesng
])

# feature extractor
feature_extractor = Sequential([
    preprocess_model,
    base_model  # no globalPooling ndeeeded this time
])

In [10]:
# extract features
train_features, train_labels = extract_features(train_dataset, feature_extractor)
val_features, val_labels = extract_features(validation_dataset, feature_extractor)
test_features, test_labels = extract_features(test_dataset, feature_extractor)

# save the features and labels to disk for future use
np.save(f'{base_dir}/EfficientNet_features/train_features.npy', train_features)
np.save(f'{base_dir}/EfficientNet_features/train_labels.npy', train_labels)
np.save(f'{base_dir}/EfficientNet_features/val_features.npy', val_features)
np.save(f'{base_dir}/EfficientNet_features/val_labels.npy', val_labels)
np.save(f'{base_dir}/EfficientNet_features/test_features.npy', test_features)
np.save(f'{base_dir}/EfficientNet_features/test_labels.npy', test_labels)



In [11]:
# load the features and labels
train_features = np.load(f'{base_dir}/EfficientNet_features/train_features.npy')
train_labels = np.load(f'{base_dir}/EfficientNet_features/train_labels.npy')
val_features = np.load(f'{base_dir}/EfficientNet_features/val_features.npy')
val_labels = np.load(f'{base_dir}/EfficientNet_features/val_labels.npy')
test_features = np.load(f'{base_dir}/EfficientNet_features/test_features.npy')
test_labels = np.load(f'{base_dir}/EfficientNet_features/test_labels.npy')

model = Sequential([
    Dense(512, activation='relu', input_shape=(train_features.shape[1],)),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(train_labels.shape[1], activation='softmax')  # Output layer nodes equal to the number of classes
])

# compile and train
model.compile(optimizer=Adam(lr=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_features, train_labels,
                    epochs=20,
                    batch_size=32,
                    validation_data=(val_features, val_labels),
                    verbose=2)

# eval
test_loss, test_acc = model.evaluate(test_features, test_labels, verbose=2)
print(f'Test accuracy: {test_acc}, Test loss: {test_loss}')

  super().__init__(name, **kwargs)


Epoch 1/20
2645/2645 - 17s - loss: 6.2645 - accuracy: 0.0023 - val_loss: 6.2635 - val_accuracy: 0.0019 - 17s/epoch - 6ms/step
Epoch 2/20
2645/2645 - 15s - loss: 6.2614 - accuracy: 0.0029 - val_loss: 6.2638 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 3/20
2645/2645 - 16s - loss: 6.2598 - accuracy: 0.0028 - val_loss: 6.2647 - val_accuracy: 0.0019 - 16s/epoch - 6ms/step
Epoch 4/20
2645/2645 - 14s - loss: 6.2587 - accuracy: 0.0030 - val_loss: 6.2656 - val_accuracy: 0.0019 - 14s/epoch - 5ms/step
Epoch 5/20
2645/2645 - 15s - loss: 6.2579 - accuracy: 0.0031 - val_loss: 6.2663 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 6/20
2645/2645 - 15s - loss: 6.2575 - accuracy: 0.0031 - val_loss: 6.2676 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 7/20
2645/2645 - 15s - loss: 6.2570 - accuracy: 0.0031 - val_loss: 6.2687 - val_accuracy: 0.0019 - 15s/epoch - 6ms/step
Epoch 8/20
2645/2645 - 16s - loss: 6.2568 - accuracy: 0.0031 - val_loss: 6.2692 - val_accuracy: 0.0019 - 16s/epoch - 6

In [13]:
# load pre-trained
base_model = EfficientNetB0(weights='imagenet', include_top=False,
                            input_tensor=Input(shape=(224, 224, 3)))

# freeze base
base_model.trainable = False

# create the custom
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # Example: one dense layer
predictions = Dense(train_labels.shape[1], activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# compile and train
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_dataset, validation_data=validation_dataset, epochs=10, verbose=2)

Epoch 1/10


  super().__init__(name, **kwargs)


5290/5290 - 403s - loss: 6.3131 - accuracy: 0.0022 - val_loss: 6.2815 - val_accuracy: 0.0019 - 403s/epoch - 76ms/step
Epoch 2/10
5290/5290 - 361s - loss: 6.2718 - accuracy: 0.0024 - val_loss: 6.2687 - val_accuracy: 0.0015 - 361s/epoch - 68ms/step
Epoch 3/10
5290/5290 - 363s - loss: 6.2679 - accuracy: 0.0026 - val_loss: 6.2739 - val_accuracy: 0.0027 - 363s/epoch - 69ms/step
Epoch 4/10
5290/5290 - 368s - loss: 6.2661 - accuracy: 0.0029 - val_loss: 6.2824 - val_accuracy: 0.0019 - 368s/epoch - 70ms/step
Epoch 5/10
5290/5290 - 350s - loss: 6.2649 - accuracy: 0.0029 - val_loss: 6.2680 - val_accuracy: 0.0019 - 350s/epoch - 66ms/step
Epoch 6/10
5290/5290 - 348s - loss: 6.2635 - accuracy: 0.0029 - val_loss: 6.2653 - val_accuracy: 0.0019 - 348s/epoch - 66ms/step
Epoch 7/10
5290/5290 - 358s - loss: 6.2621 - accuracy: 0.0029 - val_loss: 6.2645 - val_accuracy: 0.0019 - 358s/epoch - 68ms/step
Epoch 8/10
5290/5290 - 349s - loss: 6.2620 - accuracy: 0.0028 - val_loss: 6.2693 - val_accuracy: 0.0019 - 34

<keras.callbacks.History at 0x1717039d520>