In [2]:
from keras.applications import VGG16

conv_base = VGG16(include_top=False, # 네트워크 최상위 fully connected layer 포함할지 안할지
                  weights='imagenet', # imagenet으로 훈련된 가중치 사용
                  input_shape=(150,150,3))

conv_base.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584   

**여기에 fully connected layer를 연결한다.**

- 연결하는 방식은 크게 2가지
    - 데이터 셋을 conv_base에 한번만 통과시켜 얻은 output을 저장해두고, 연결할 fully connected layer에 입력값으로 넣는다.
    - (conv_base + fully connected layer)에 데이터셋을 여러번 통과시킨다.

In [4]:
# 첫번째 방식
# 이 방식은 data augmentation을 사용하지 못한다.
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

base_dir = './datasets/cats_and_dogs_small'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

datagen = ImageDataGenerator(rescale=1./255)
batch_size=20

In [5]:
def extraction_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 4,4,512))
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow_from_directory(directory=directory,
                                            target_size=(150,150),
                                            batch_size=batch_size,
                                            class_mode='binary')
    # flow_from_directory의 return은 (x,y)pair가 나온다.
    # x = (batch_size, target_size, channels)
    # y = a array of label 
    i = 0
    for inputs_batch, labels_batch in generator:
        features[i*batch_size : (i+1)*batch_size] = conv_base.predict(inputs_batch)
        labels[i*batch_size : (i+1)*batch_size] = labels_batch
        
        i += 1
        if i * batch_size >= sample_count : 
            break # generator는 루프 안에서 무한하게 데이터를 만들어내기때문에 모든 image를 한번씩 처리하고 나면 중지시킨다.
    return features, labels

train_features, train_labels = extraction_features(train_dir, 2000)
validation_features, validation_labels = extraction_features(validation_dir, 1000)
test_features, test_labels = extraction_features(test_dir, 1000)

Found 2000 images belonging to 2 classes.


KeyboardInterrupt: 