<a href="https://colab.research.google.com/github/LouisVanLangendonck/UPC-AML-ArchitectureClassif/blob/main/feature_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import itertools
import os
import matplotlib.pylab as plt
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import keras
import random
print("TF version:", tf.__version__)
print("Hub version:", hub.__version__)
print("keras version:", keras.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

TF version: 2.9.2
Hub version: 0.12.0
keras version: 2.9.0
GPU is NOT AVAILABLE


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
train_data = '/content/drive/MyDrive/FIB-2022-2023/aml/data/unzipped/train+val'
test_data = '/content/drive/MyDrive/FIB-2022-2023/aml/data/unzipped/test'
print(os.listdir(train_data))

['modernism', 'gothic', 'contemporary', 'baroque', 'noucentisme', 'romanesque', 'neoclassicism']


In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 150

train_generator = datagen.flow_from_directory(
        train_data, 
        target_size = (224,224),
        batch_size=batch_size, 
        class_mode = 'categorical')

test_generator = datagen.flow_from_directory(
        test_data, 
        target_size = (224,224),
        batch_size=batch_size, 
        class_mode = 'categorical')

Found 12618 images belonging to 7 classes.
Found 3109 images belonging to 7 classes.


In [None]:
from tensorflow.keras.applications import VGG19
from keras import layers
from keras import models

feature_extractor = VGG19(
    weights='imagenet',
    include_top=False
)

In [None]:
nr_train_images = 12618
nr_test_images = 3109
nr_of_target_classes = 7

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
#pip install tqdm
from tqdm import tqdm

def extract_features(generator, sample_count):
    print('Beginning feature extraction for {} samples in {} batches:'.format(sample_count, int(np.ceil(sample_count/batch_size))))
    with tqdm(total=int(np.ceil(sample_count/batch_size)), position=0, leave=True) as pbar:
        features = np.zeros(shape = (sample_count,7,7,512))
        labels = np.zeros(shape = (sample_count, nr_of_target_classes))
        i = 0
        for inputs_batch, labels_batch in generator:
            pbar.update(n=1)
            features_batch = feature_extractor.predict(inputs_batch, verbose=0)
            features[i*batch_size:(i+1)*batch_size] = features_batch
            labels[i*batch_size : (i+1)*batch_size] = labels_batch
            i += 1
            if (i+1)*batch_size >= sample_count:
                print('final batch')
                features_batch = feature_extractor.predict(inputs_batch, verbose=0)
                features[i*batch_size:sample_count] = features_batch[0:sample_count-(i*batch_size)]
                labels[i*batch_size:sample_count] = labels_batch[0:sample_count-(i*batch_size)]
                break
    print('Features extracted!')
    print('Shape of feature vector:{}'.format(features.shape))
    print('Shape of labels vector:{}'.format(labels.shape))
    return features, labels

print('Train Feature Extraction:')
train_features, train_labels = extract_features(train_generator, nr_train_images)
print('Test Feature Extraction:')
test_features, test_labels = extract_features(test_generator, nr_test_images)

Train Feature Extraction:
Beginning feature extraction for 12618 samples in 85 batches:


 99%|█████████▉| 84/85 [2:15:13<01:35, 95.68s/it]

final batch


 99%|█████████▉| 84/85 [2:16:33<01:37, 97.54s/it]


Features extracted!
Shape of feature vector:(12618, 7, 7, 512)
Shape of labels vector:(12618, 7)
Test Feature Extraction:
Beginning feature extraction for 3109 samples in 21 batches:


 95%|█████████▌| 20/21 [31:55<01:38, 98.33s/it]

final batch


 95%|█████████▌| 20/21 [33:14<01:39, 99.75s/it]

Features extracted!
Shape of feature vector:(3109, 7, 7, 512)
Shape of labels vector:(3109, 7)





In [None]:
all_features = np.asarray([(train_features, train_labels), (test_features, test_labels)], dtype=object)
np.save('/content/drive/MyDrive/FIB-2022-2023/aml/models/VGG19_features.npy', all_features)

In [None]:
inp_train_features = np.reshape(train_features, (train_images, 7*7*512))
inp_val_features = np.reshape(val_features, (val_images, 7*7*512))
inp_test_features = np.reshape(test_features, (test_images, 7*7*512))