##### Copyright 2018 The TensorFlow Authors.

In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Image classification

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/tutorials/images/classification"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/images/classification.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/tutorials/images/classification.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/images/classification.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

This tutorial shows how to classify cats or dogs from images. It builds an image classifier using a `tf.keras.Sequential` model and load data using `tf.keras.preprocessing.image.ImageDataGenerator`. You will get some practical experience and develop intuition for the following concepts:

* Building _data input pipelines_ using the `tf.keras.preprocessing.image.ImageDataGenerator` class to efficiently work with data on disk to use with the model.
* _Overfitting_ —How to identify and prevent it.
* _Data augmentation_ and _dropout_ —Key techniques to fight overfitting in computer vision tasks to incorporate into the data pipeline and image classifier model.

This tutorial follows a basic machine learning workflow:

1. Examine and understand data
2. Build an input pipeline
3. Build the model
4. Train the model
5. Test the model
6. Improve the model and repeat the process

## Import packages

Let's start by importing the required packages. The `os` package is used to read files and directory structure, NumPy is used to convert python list to numpy array and to perform required matrix operations and `matplotlib.pyplot` to plot the graph and display images in the training and validation data.

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

Import Tensorflow and the Keras classes needed to construct our model.

In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

## Load data

Begin by downloading the dataset. This tutorial uses a filtered version of <a href="https://www.kaggle.com/c/dogs-vs-cats/data" target="_blank">Dogs vs Cats</a> dataset from Kaggle. Download the archive version of the dataset and store it in the "/tmp/" directory.

The dataset has the following directory structure:

<pre>
<b>cats_and_dogs_filtered</b>
|__ <b>train</b>
    |______ <b>cats</b>: [cat.0.jpg, cat.1.jpg, cat.2.jpg ....]
    |______ <b>dogs</b>: [dog.0.jpg, dog.1.jpg, dog.2.jpg ...]
|__ <b>validation</b>
    |______ <b>cats</b>: [cat.2000.jpg, cat.2001.jpg, cat.2002.jpg ....]
    |______ <b>dogs</b>: [dog.2000.jpg, dog.2001.jpg, dog.2002.jpg ...]
</pre>

In [0]:
from google.colab import drive
drive.mount('/content/drive')

PATH ='/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/'

After extracting its contents, assign variables with the proper file path for the training and validation set.

In [0]:
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
test_dir = os.path.join(PATH,'test')

In [0]:
train_cats_dir = os.path.join(train_dir, 'cats')  # directory with our training cat pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')  # directory with our training dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')  # directory with our validation cat pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')  # directory with our validation dog pictures

### Understand the data

Let's look at how many cats and dogs images are in the training and validation directory:

In [0]:
num_cats_tr = len(os.listdir(train_cats_dir))
num_dogs_tr = len(os.listdir(train_dogs_dir))

num_cats_val = len(os.listdir(validation_cats_dir))
num_dogs_val = len(os.listdir(validation_dogs_dir))

total_train = num_cats_tr + num_dogs_tr
total_val = num_cats_val + num_dogs_val

In [0]:
print('total training cat images:', num_cats_tr)
print('total training dog images:', num_dogs_tr)

print('total validation cat images:', num_cats_val)
print('total validation dog images:', num_dogs_val)
print("--")
print("Total training images:", total_train)
print("Total validation images:", total_val)

For convenience, set up variables to use while pre-processing the dataset and training the network.

In [0]:
batch_size = 128
epochs = 15
IMG_HEIGHT = 150
IMG_WIDTH = 150
IMG_SIZE = (IMG_WIDTH, IMG_HEIGHT)
IMG_CHANELS = 3

# MAKE_MODEL

## Overfitting

## Data augmentation

Overfitting generally occurs when there are a small number of training examples. One way to fix this problem is to augment the dataset so that it has a sufficient number of training examples. Data augmentation takes the approach of generating more training data from existing training samples by augmenting the samples using random transformations that yield believable-looking images. The goal is the model will never see the exact same picture twice during training. This helps expose the model to more aspects of the data and generalize better.

Implement this in `tf.keras` using the `ImageDataGenerator` class. Pass  different transformations to the dataset and it will take care of applying it during the training process.

### Put it all together

Apply all the previous augmentations. Here, you applied rescale, 45 degree rotation, width shift, height shift, horizontal flip and zoom augmentation to the training images.

In [0]:
image_gen_train = ImageDataGenerator(
                    rescale=1./255,
                    rotation_range=45,
                    width_shift_range=.15,
                    height_shift_range=.15,
                    horizontal_flip=True,
                    zoom_range=0.5
                    )

image_gen_val = ImageDataGenerator(rescale=1./255)
image_gen_test = ImageDataGenerator(rescale=1./255)

In [0]:
train_data_gen = image_gen_train.flow_from_directory(batch_size=batch_size,
                                                     directory=train_dir,
                                                     shuffle=True,
                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     class_mode='binary')
val_data_gen = image_gen_val.flow_from_directory(batch_size=batch_size,
                                                 directory=validation_dir,
                                                 target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                 class_mode='binary')
test_data_gen = image_gen_train.flow_from_directory(batch_size=1,
                                                     directory=test_dir,
                                                     shuffle=False,
                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     class_mode='binary')

In [0]:
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

Visualize how a single image would look five different times when passing these augmentations randomly to the dataset.

In [0]:
augmented_images = [train_data_gen[0][0][0] for i in range(5)]
plotImages(augmented_images)

## Dropout

Another technique to reduce overfitting is to introduce *dropout* to the network. It is a form of *regularization* that forces the weights in the network to take only small values, which makes the distribution of weight values more regular and the network can reduce overfitting on small training examples. Dropout is one of the regularization technique used in this tutorial

When you apply dropout to a layer it randomly drops out (set to zero) number of output units from the applied layer during the training process. Dropout takes a fractional number as its input value, in the form such as 0.1, 0.2, 0.4, etc. This means dropping out 10%, 20% or 40% of the output units randomly from the applied layer.

When appling 0.1 dropout to a certain layer, it randomly kills 10% of the output units in each training epoch.

Create a network architecture with this new dropout feature and apply it to different convolutions and fully-connected layers.

## Creating a new network with Dropouts

Here, you apply dropout to first and last max pool layers. Applying dropout will randomly set 20% of the neurons to zero during each training epoch. This helps to avoid overfitting on the training dataset.

In [0]:
model_new = Sequential([
    Conv2D(128, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
    Conv2D(128, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Dropout(0.2),
    Conv2D(64, 3, padding='same', activation='relu'),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Dropout(0.2),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_new.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_new.summary()

### Compile the model

After introducing dropouts to the network, compile the model and view the layers summary.

### Train the model

After successfully introducing data augmentations to the training examples and adding dropouts to the network, train this new network:

In [0]:
history = model_new.fit_generator(
    train_data_gen,
    steps_per_epoch=total_train // batch_size,
    #epochs=50,
    #epochs=50,
    #epochs=50,
    #epochs=50,
    epochs=100,
    validation_data=val_data_gen,
    validation_steps=total_val // batch_size
)

### Visualize the model

Visualize the new model after training, you can see that there is significantly less overfitting than before. The accuracy should go up after training the model for more epochs.

In [0]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(100)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [0]:
model_new.save('/content/drive/My Drive/DOG_N_CAT/BAGUS_05022020_1705_val_acc_07679.h5')

In [0]:
import numpy as np

In [0]:
img = tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/cat.419.jpg',target_size=(150, 150))
img = np.asarray(img)
plt.imshow(img)

img = np.expand_dims(img, axis=0)

result = model_new.predict_classes(img)
plt.xlabel(["KOCENG" if int(result) == 0 else "ANJENG"])
      






In [0]:
img = tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/dog.745.jpg',target_size=(150, 150))
img = np.asarray(img)
plt.imshow(img)

img = np.expand_dims(img, axis=0)

result = model_new.predict_classes(img)
plt.xlabel("KOCENG" if int(result) == 0 else "ANJENG")




In [0]:
img = tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/dog.900.jpg',target_size=(150, 150))
img = np.asarray(img)
plt.imshow(img)

img = np.expand_dims(img, axis=0)

result = model_new.predict_classes(img)
plt.xlabel("KOCENG" if int(result) == 0 else "ANJENG")



In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.axes as axes



DATA_LOAD = tf.keras.models.load_model('/content/drive/My Drive/DOG_N_CAT/BAGUS_04022020_1706_val_acc_077.h5')


img = tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/cat.180.jpg',target_size=(150, 150))
img = np.asarray(img)
plt.imshow(img)

img = np.expand_dims(img, axis=0)

result = DATA_LOAD.predict_classes(img)

plt.xlabel("KOCENG" if int(result) == 0 else "ANJENG")


In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
OMG = []
OMJ = []

TAMPOL = "/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/"

os.chdir("/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/")
for file in glob.glob("*.jpg"):
    OMG.append(file)

for i in OMG:
  img = tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/'+str(i),target_size=(150, 150))
  img = np.asarray(img)
  img = np.expand_dims(img, axis=0)
  result = DATA_LOAD.predict_classes(img)
  plt.xlabel("KOCENG" if int(result) == 0 else "ANJENG")


plt.figure(figsize=(100,100))
for i in range(18):
    plt.subplot(3,6,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    img = tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/'+str(OMG[i]),target_size=(150, 150))
    img = np.asarray(img)
    img = np.expand_dims(img, axis=0)
    plt.imshow(img)
    plt.xlabel(OMJ[i])
plt.show()



In [0]:
import glob
import os




DATA_LOAD = tf.keras.models.load_model('/content/drive/My Drive/DOG_N_CAT/BAGUS_04022020_1706_val_acc_077.h5')

def prediksi_semua(x):
  img = tf.keras.preprocessing.image.load_img(x,target_size=(150, 150))
  img = np.asarray(img)
  #plt.figure()
  plt.imshow(img)

  img = np.expand_dims(img, axis=0)

  result = DATA_LOAD.predict_classes(img)
  plt.xlabel("KOCENG" if int(result) == 0 else "ANJENG")




os.chdir("/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/test/")
for file in glob.glob("*.jpg"):

    prediksi_semua(file)    


In [0]:
ls /content/drive/

In [0]:
history = model_new.fit_generator(
    train_data_gen,
    steps_per_epoch=total_train // batch_size,
    epochs=50,
    validation_data=val_data_gen,
    validation_steps=total_val // batch_size
)

In [0]:
epochs_range = range(50)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [0]:
model.save('/content/drive/My Drive/DOG_N_CAT/BAGUS_04022020_0853.h5')

In [0]:
history = model_new.fit_generator(
    train_data_gen,
    steps_per_epoch=total_train // batch_size,
    epochs=50,
    validation_data=val_data_gen,
    validation_steps=total_val // batch_size
)

In [0]:
model_new.save('/content/drive/My Drive/DOG_N_CAT/BAGUS_04022020_0910_VAL_ACC_078.h5')

In [0]:
model_new2 = Sequential([
    Conv2D(64, 3, padding='same', activation='relu',input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Dropout(0.2),
    Conv2D(128, 3, padding='same', activation='relu'),
    Conv2D(128, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(256, 3, padding='same', activation='relu'),
    Conv2D(256, 3, padding='same', activation='relu'),
    Conv2D(256, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(512, 3, padding='same', activation='relu'),
    Conv2D(512, 3, padding='same', activation='relu'),
    Conv2D(512, 3, padding='same', activation='relu'),
    MaxPooling2D(),    
    Conv2D(512, 3, padding='same', activation='relu'),
    Conv2D(512, 3, padding='same', activation='relu'),
    Conv2D(512, 3, padding='same', activation='relu'),
    MaxPooling2D(),

    Dropout(0.2),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_new2.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model_new2.summary()

history = model_new2.fit_generator(
    train_data_gen,
    steps_per_epoch=total_train // batch_size,
    epochs=50,
    validation_data=val_data_gen,
    validation_steps=total_val // batch_size
)

# **Note**

setelah dibandingkan , model_new dengan arsitektur yang sederhana lebih cepat convergen val_accuracynya (walaupun dengan beberapa kali iterasi/fit yang masing masing 50 epochs)

untuk model_new2 dengan arsitektur yang mendekati VGG16 converen tidak tecapai , val_accuracy rendah hanya pada kisaran 0.4 ~ 0.5 , dan memakan resource yang banyak.

# RE-Use model
Yang telah di train dan menghasilan akurasi lumayan baik 0.79 

In [0]:

model_reuse = tf.keras.models.load_model('/content/drive/My Drive/DOG_N_CAT/BAGUS_04022020_0910_VAL_ACC_078.h5')

In [0]:
import numpy as np

#test_image =tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/validation/cats/cat.2000.jpg',target_size =(150,150))
#test_image =tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/validation/dogs/dog.2000.jpg',target_size =(150,150))
#test_image =tf.keras.preprocessing.image.img_to_array(test_image)
#test_image =np.expand_dims(test_image, axis =0)
#result = model_reuse.predict(test_image)

#print(result)
#if result[0][0] >= 0.5:
#    prediction = 'dog'
#else:
#    prediction = 'cat'
#print(prediction)


#image_path="test_set/cat2.png"
#img = image.load_img(image_path, target_size=(IMG_SIZE, IMG_SIZE))
test_image =tf.keras.preprocessing.image.load_img('/content/drive/My Drive/DOG_N_CAT/cats_and_dogs_filtered/validation/dogs/dog.2000.jpg',target_size =(150,150))
plt.imshow(test_image)
test_image = np.expand_dims(test_image, axis=0)
result=model_reuse.predict_classes(test_image)
plt.title(get_label_name(result[0][0]))
plt.show()