### Improving Computer Vision Accuracy using Convolutions (Human vs Horse)


file download : https://storage.googleapis.com/laurencemoroney-blog.appspot.com/horse-or-human.zip

In [1]:
import os
import zipfile

local_zip = './tmp/horse-or-human.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('./tmp/horse-or-human')
zip_ref.close()

In [None]:
train_horse_dir = os.path.join('./tmp/horse-or-human/horses')
train_human_dir = os.path.join('./tmp/horse-or-human/humans')

In [None]:
train_horse_names = os.listdir(train_horse_dir)
print(train_horse_names[:10])

train_human_names = os.listdir(train_human_dir)
print(train_human_names[:10])


In [None]:
import matplotlib.pyplot as plt 
import matplotlib.image as mpimg 

nrows = 4
ncols = 4

pic_index = 0


In [None]:
fig = plt.gcf()
fig.set_size_inches(ncols * 4 , nrows * 4)
pic_index += 8
next_horse_pix = [
    os.path.join(train_horse_dir, fname)
    for fname in train_horse_names[pic_index-8:pic_index]
]

next_human_pix = [
    os.path.join(train_human_dir, fname)
    for fname in train_human_names[pic_index-8:pic_index]
]

for i, img_path in enumerate(next_horse_pix+next_human_pix):
    sp = plt.subplot(nrows, ncols, i+1)
    sp.axis('Off')

    img = mpimg.imread(img_path)
    plt.imshow(img)

plt.show()


In [None]:
import tensorflow as tf

In [None]:
model = tf.keras.models.Sequential(
    [
        tf.keras.layers.Conv2D(16,(3,3),activation='relu', input_shape=(300,300,3))
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(32,(3,3),activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64,(3,3),activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512,activation='relu'),
        tf.keras.layers.Dense(1,activation='sigmoid')
    ]
)

model.summary()


In [None]:
from tensorflow.keras.optimizers import RMSprop

model.compile(
    loss='binary_crossentropy',
    optimizer=RMSprop(lr=0.001),
    metrics=['accuracy']
)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1/255)

train_generator = train_datagen.flow_from_directory(
    './tmp/horse-or-human/',
    target_size=(300,300),
    batch_size=128,
    class_mode='binary'
)


In [None]:
history = model.fit(
    train_generator,
    steps_per_epoch=8,
    epochs=15,
    verbose=1
)


### Week4 Quiz

1. Using Image Generator, how do you label images?
- [ ] It's based on the file name
- [X] It's based on the directory the images is contained in
- [ ] You have to manually do It
- [ ] TensorFlow figures it out from the contents

2. What method on the images Generator is used to normalize the image?
- [ ] normalize_image
- [X] rescale
- [ ] normalize
- [ ] Rescale_image

3. How did we specify the training size for the images?
- [ ] The training_size parameter on the validation generator
- [X] The target_size parameter on the training generator
- [ ] The training_size parameter on the training generator
- [ ] The target_size parameter on the validation generator

4. When we specify the input_shape to be (300,300,3), what does that mean?
- [ ] There will be 300 horses and 300 humans, loaded in batches of 3
- [ ] 
Every Image will be 300x300 pixels, and there should be 3 Convolutional Layers
- [X] Every Image will be 300x300 pixels, with 3 bytes to define color
- [ ] There will be 300 images, each size 300, loaded in batches of 3

5. If your training data is close to 1.000 accuracy, but your validation data isn’t, what’s the risk here?
- [ ] You’re underfitting on your validation data
- [X] You’re overfitting on your training data  
- [ ] No risk, that’s a great result  
- [ ] You’re overfitting on your validation data

6. Convolutional Neural Networks are better for classifying images like horses and humans because:
- [ ] 
In these images, the features may be in different parts of the frame
- [ ] There’s a wide variety of horses
- [ ] There’s a wide variety of humans
- [X] All of the above

7. After reducing the size of the images, the training results were different. Why?
- [ ] 
There was more condensed information in the images
- [ ] The training was faster
- [X] We removed some convolutions to handle the smaller images
- [ ] There was less information in the images