In [11]:
import keras

In [12]:
batch_size = 32
img_size = (224, 224)

train_dataset, validation_dataset = keras.utils.image_dataset_from_directory(
    'CIFAR-10-images/train',
    batch_size=batch_size, 
    image_size=img_size,
    label_mode='categorical',
    validation_split=0.2, 
    subset='both',
    seed=123123)

test_dataset = keras.utils.image_dataset_from_directory(
    'CIFAR-10-images/test',
    batch_size=batch_size, 
    image_size=img_size,
    label_mode='categorical')

# Print the first taining image's first channel
print(train_dataset.take(1).get_single_element()[0][0][0][0])

Found 50000 files belonging to 10 classes.
Using 40000 files for training.
Using 10000 files for validation.
Found 10000 files belonging to 10 classes.
tf.Tensor([128. 172. 121.], shape=(3,), dtype=float32)


In [13]:
from keras.applications import VGG16

VGG16_model = VGG16(include_top=False)
VGG16_model.trainable = False
VGG16_model.summary()

In [14]:
from keras.applications.vgg16 import preprocess_input


# Take a small batch from the training dataset
small_batch = train_dataset.take(1)

# Extract the images (x) from the batch
for images, labels in small_batch:
    break  # Get the first batch

preprocessed_images = preprocess_input(images)

# Run the small batch through the pretrained model
test_features = VGG16_model.predict(preprocessed_images)

# Print the shape of the output
print("Shape of extracted features:", test_features.shape)

# Optionally, inspect the feature values
print("Sample feature values:", test_features[0, :5, :5, 0])  # Print a small slice

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 953ms/step
Shape of extracted features: (32, 7, 7, 512)
Sample feature values: [[0.        0.        0.        0.        0.       ]
 [0.        0.        0.        0.        0.       ]
 [0.        0.        0.        0.        0.       ]
 [0.        0.        0.        2.658058  0.       ]
 [0.        0.        0.        5.8831825 5.4482813]]


In [None]:
from keras.applications.vgg16 import preprocess_input
import numpy as np
from tqdm import tqdm

# Process datasets with proper preprocessing
def process_dataset(dataset, model, desc="Processing"):
    features_list = []
    labels_list = []
    
    # Get total number of batches for progress tracking
    total_batches = 0
    for _ in dataset:
        total_batches += 1
    
    # Reset the dataset iterator
    dataset = dataset.repeat(1)
    
    # Process with progress bar
    with tqdm(total=total_batches, desc=desc) as pbar:
        for images, labels in dataset:
            # Apply VGG16 preprocessing to each batch
            preprocessed_images = preprocess_input(images)
            # Extract features
            features = model.predict(preprocessed_images, verbose=0, batch_size=32)
            features_list.append(features)
            labels_list.append(labels)
            pbar.update(1)
    
    return np.vstack(features_list), np.vstack(labels_list)

print("Processing training dataset...")
train_features, train_labels = process_dataset(train_dataset, VGG16_model, desc="Training")

print("Processing validation dataset...")
validation_features, validation_labels = process_dataset(validation_dataset, VGG16_model, desc="Validation")

print("Processing test dataset...")
test_features, test_labels = process_dataset(test_dataset, VGG16_model, desc="Test")

# Print the shape to confirm the output dimensions
print("Training features shape:", train_features.shape)
print("Validation features shape:", validation_features.shape)
print("Test features shape:", test_features.shape)

# Save the features to disk
# Save the features and labels to .npy files
np.save('train_features.npy', train_features)
np.save('validation_features.npy', validation_features)
np.save('test_features.npy', test_features)
np.save('train_labels.npy', train_labels)
np.save('validation_labels.npy', validation_labels)
np.save('test_labels.npy', test_labels)

Processing training dataset...


Training: 100%|██████████| 1250/1250 [17:41<00:00,  1.18it/s]


Processing validation dataset...


Validation: 100%|██████████| 313/313 [04:06<00:00,  1.27it/s]


Processing test dataset...


Test: 100%|██████████| 313/313 [04:03<00:00,  1.29it/s]


Training features shape: (40000, 7, 7, 512)
Validation features shape: (10000, 7, 7, 512)
Test features shape: (10000, 7, 7, 512)


In [20]:
import keras
import numpy as np

# Load the features and labels from disk
train_features = np.load('train_features.npy')
validation_features = np.load('validation_features.npy')
test_features = np.load('test_features.npy')
train_labels = np.load('train_labels.npy')
validation_labels = np.load('validation_labels.npy')
test_labels = np.load('test_labels.npy')

In [24]:
fullyConnectedClassifier = keras.Sequential([
    keras.layers.InputLayer(shape=(7, 7, 512)),
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(10, activation='softmax')
])

optimizer = keras.optimizers.Adam(learning_rate=0.005)

fullyConnectedClassifier.compile(optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy'])

fullyConnectedClassifier.summary()

# Train the classifier
fullyConnectedClassifier.fit(train_features, train_labels,
    epochs=10,
    batch_size=32,
    validation_data=(validation_features, validation_labels))

# Evaluate the classifier on the test set
test_loss, test_accuracy = fullyConnectedClassifier.evaluate(test_features, test_labels)
print("Test accuracy:", test_accuracy)
print("Test loss:", test_loss)


Epoch 1/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.5461 - loss: 1.4003 - val_accuracy: 0.7279 - val_loss: 0.7712
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6910 - loss: 0.9115 - val_accuracy: 0.7286 - val_loss: 0.7872
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7048 - loss: 0.8599 - val_accuracy: 0.7471 - val_loss: 0.7116
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7186 - loss: 0.8287 - val_accuracy: 0.7356 - val_loss: 0.7439
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7218 - loss: 0.8218 - val_accuracy: 0.7505 - val_loss: 0.7175
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7242 - loss: 0.8211 - val_accuracy: 0.7453 - val_loss: 0.7344
Epoch 7/10
[1m1

Aluksi koulutin esikoulutetulla mallilla featuret, mutta oli väärä formaatti. VGG16 vaatii eräänlaisen esiprosessoinnin ennen toimintaa.