In [4]:
from google.colab import drive
drive.mount("/gdrive")

Mounted at /gdrive


In [0]:
import zipfile
f = zipfile.ZipFile("/gdrive/My Drive/additional/train.zip")
f.extractall()
f.close()

In [7]:
import glob
import pandas as pd
catf = glob.glob("train/cat.*")
catt = [0] * len(catf)
dogf = glob.glob("train/dog.*")
dogt = [1] * len(dogf)
df = pd.DataFrame({
    "path":catf + dogf,
    "ans":catt + dogt
})
df

Unnamed: 0,path,ans
0,train/cat.7482.jpg,0
1,train/cat.5132.jpg,0
2,train/cat.3502.jpg,0
3,train/cat.251.jpg,0
4,train/cat.9148.jpg,0
...,...,...
24995,train/dog.4757.jpg,1
24996,train/dog.9115.jpg,1
24997,train/dog.3008.jpg,1
24998,train/dog.6541.jpg,1


In [0]:
from keras.preprocessing.image import load_img
import numpy as np
img = load_img("train/dog.7507.jpg", target_size=(224, 224))
img_np = np.array(img)
# img_np

In [16]:
from keras.applications.vgg16 import VGG16
vgg = VGG16(include_top=False, input_shape=(224, 224, 3))
vgg.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [38]:
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import BatchNormalization

for l in vgg.layers:
    l.trainable = False
    
x = BatchNormalization()(vgg.output)
x = Flatten()(x)
x = Dense(2048, activation="relu")(x)
x = Dropout(0.25)(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.25)(x)
x = Dense(2, activation="softmax")(x)
cnn = Model(inputs=vgg.input, outputs=x)
cnn.summary()


Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0   

In [0]:
cnn.compile(loss="categorical_crossentropy",
            optimizer="adam",
            metrics=["accuracy"])

In [0]:
# !!!!!!! 只要開始訓練, numpy array
import numpy as np
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
x = df["path"]
y = df["ans"]
x_np = np.array(x)
y_np = np.array(y)
y_np_cat = to_categorical(y_np)
x_train, x_test, y_train, y_test = train_test_split(x_np, y_np_cat, test_size=0.1)

In [0]:
# preprocess: https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py
from keras.applications.vgg16 import preprocess_input
def get_images(x, y, size):
    idx = np.random.randint(0, len(x), size)
    paths = x[idx]
    imgs = []
    for p in paths:
        img = load_img(p, target_size=(224, 224))
        img_np = np.array(img)
        img_np_pre = preprocess_input(img_np)
        imgs.append(img_np_pre)
    return (np.array(imgs), y[idx])

In [0]:
xtest, ytest = get_images(x_test, y_test, 20)

In [43]:
for i in range(10):
    print("Times:", i)
    x, y = get_images(x_train, y_train, 20)
    result = cnn.train_on_batch(x, y)
    print("Train:", result)
    test_result = cnn.test_on_batch(xtest, ytest)
    print("Validate:", test_result)
    print("*" * 50)

Times: 0
Train: [0.80902797, 0.65]
Validate: [3.7092566, 0.7]
**************************************************
Times: 1
Train: [2.1551003, 0.55]
Validate: [0.5945722, 0.9]
**************************************************
Times: 2
Train: [1.3575257, 0.9]
Validate: [1.544062, 0.85]
**************************************************
Times: 3
Train: [1.6765306, 0.75]
Validate: [0.42872137, 0.9]
**************************************************
Times: 4
Train: [1.7117046, 0.8]
Validate: [0.09312683, 0.9]
**************************************************
Times: 5
Train: [2.6866076, 0.75]
Validate: [0.21526854, 0.9]
**************************************************
Times: 6
Train: [0.62482405, 0.85]
Validate: [0.05024139, 0.95]
**************************************************
Times: 7
Train: [0.51524687, 0.9]
Validate: [0.019498855, 1.0]
**************************************************
Times: 8
Train: [1.0688536, 0.85]
Validate: [0.19733672, 0.9]
***********************************

In [46]:
# Model: 沒有predict_classes: predict + argmax
cnn.predict(xtest).argmax(axis=1)

array([0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0])

In [47]:
xfinal, yfinal = get_images(x_test, y_test, 200)
cnn.evaluate(xfinal, yfinal)



[0.7356550100510685, 0.92]