<a href="https://colab.research.google.com/github/RahulNaidu710/datasciencecoursera/blob/master/Kaggle_catsvsdogssimple_tf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount to google drive
from google.colab import drive
drive.mount('/content/drive')

In [2]:
# Import required libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import re
import glob
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline
import random
import shutil
from shutil import copyfile
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import pickle
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.applications.inception_v3 import InceptionV3

In [None]:
# Unrar the .rar format data 
!unrar x "/content/drive/My Drive/cats-vs-dogs.rar" "/tmp"

In [4]:
base_dir = '/tmp/cats-vs-dogs'
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')
train_cats_dir = os.path.join(train_dir, 'Cat')
train_dogs_dir = os.path.join(train_dir, 'Dog')
test_unknown_dir = os.path.join(test_dir, 'unknown')

train_cats_fnames = os.listdir(train_cats_dir)
train_cats_fnames.sort(key=lambda f: int(''.join(filter(str.isdigit, f)) or -1))
train_dogs_fnames = os.listdir(train_dogs_dir)
train_dogs_fnames.sort(key=lambda f: int(''.join(filter(str.isdigit, f)) or -1))
test_fnames = os.listdir(test_unknown_dir)
test_fnames.sort(key=lambda f: int(''.join(filter(str.isdigit, f)) or -1))

In [None]:
# Prints a subset of data present all subdirectories
print(train_cats_fnames[:10])
print(train_dogs_fnames[:10])
print(test_fnames[:10])

In [None]:
print('total training cat images :', len(os.listdir(train_cats_dir)))
print('total training dog images :', len(os.listdir(train_dogs_dir)))
print('total test set images :', len(os.listdir(test_unknown_dir)))

In [None]:
# Parameters for our graph; we'll output images in a 4x4 configuration
nrows = 4
ncols = 4
pic_index = 0 # Index for iterating over images

In [None]:
# Set up matplotlib fig, and size it to fit 4x4 pics
fig = plt.gcf()
fig.set_size_inches(ncols*4, nrows*4)

pic_index+=8

next_cat_pix = [os.path.join(train_cats_dir, fname) 
                for fname in train_cats_fnames[ pic_index-8:pic_index] 
               ]

next_dog_pix = [os.path.join(train_dogs_dir, fname) 
                for fname in train_dogs_fnames[ pic_index-8:pic_index]
               ]

for i, img_path in enumerate(next_cat_pix+next_dog_pix):
  # Set up subplot; subplot indices start at 1
  sp = plt.subplot(nrows, ncols, i + 1)
  sp.axis('Off') # Don't show axes (or gridlines)

  img = mpimg.imread(img_path)
  plt.imshow(img)

plt.show()

In [7]:
# Creates new training and validation directories
try:
    os.makedirs('/tmp/cats-vs-dogs/training/Cat')
    os.makedirs('/tmp/cats-vs-dogs/training/Dog')
    os.makedirs('/tmp/cats-vs-dogs/validation/Cat')
    os.makedirs('/tmp/cats-vs-dogs/validation/Dog')
except OSError:
    pass

In [8]:
# Function to split and copy data from source to training and validation subdirectories
def split_data(source, training, validation, split_size):
    files = []
    for filename in os.listdir(source):
        file = source + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + ' is of 0 length, so ignoring it')
    
    training_length = int(len(files) * split_size)
    validation_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    validation_set = shuffled_set[-validation_length:]

    for filename in training_set:
        this_file = source + filename
        destination = training + filename
        copyfile(this_file, destination)

    for filename in validation_set:
        this_file = source + filename
        destination = validation + filename
        copyfile(this_file, destination)
        
cat_source_dir = "/tmp/cats-vs-dogs/train/Cat/"
training_cats_dir = "/tmp/cats-vs-dogs/training/Cat/"
validation_cats_dir = "/tmp/cats-vs-dogs/validation/Cat/"
dog_source_dir = "/tmp/cats-vs-dogs/train/Dog/"
training_dogs_dir = "/tmp/cats-vs-dogs/training/Dog/"
validation_dogs_dir = "/tmp/cats-vs-dogs/validation/Dog/"

In [None]:
split_size = .9
split_data(cat_source_dir, training_cats_dir, validation_cats_dir, split_size)
split_data(dog_source_dir, training_dogs_dir, validation_dogs_dir, split_size)

In [None]:
print(len(os.listdir('/tmp/cats-vs-dogs/training/Cat/')))
print(len(os.listdir('/tmp/cats-vs-dogs/training/Dog/')))
print(len(os.listdir('/tmp/cats-vs-dogs/validation/Cat/')))
print(len(os.listdir('/tmp/cats-vs-dogs/validation/Dog/')))

In [None]:
# Define model
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 150x150 with 3 bytes color
    tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2), 
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'), 
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(), 
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'), 
    # Only 1 output neuron. It will contain a value from 0-1 where 0 for 1 class ('cats') and 1 for the other ('dogs')
    tf.keras.layers.Dense(1, activation='sigmoid')  
])

In [None]:
# Downloads an instance of weights for the inception model
!wget --no-check-certificate \
    https://storage.googleapis.com/mledu-datasets/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5 \
    -O /tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

In [None]:
# Define pretrained model
local_weights_file = '/tmp/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'

pre_trained_model = InceptionV3(input_shape = (150, 150, 3), 
                                include_top = False, 
                                weights = None)

pre_trained_model.load_weights(local_weights_file)

for layer in pre_trained_model.layers:
  layer.trainable = False
  
pre_trained_model.summary()

In [None]:
last_layer = pre_trained_model.get_layer('mixed7')
print('last layer output shape: ', last_layer.output_shape)
last_output = last_layer.output

In [14]:
# Flatten the output layer to 1 dimension
x = layers.Flatten()(last_output)
# Add a fully connected layer with 1,024 hidden units and ReLU activation
x = layers.Dense(1024, activation='relu')(x)
# Add a dropout rate of 0.2
x = layers.Dropout(0.2)(x)                  
# Add a final sigmoid layer for classification
x = layers.Dense(1, activation='sigmoid')(x)       

model = Model(pre_trained_model.input, x)

In [15]:
# Compiles model
model.compile(optimizer = 'adam',
              loss='binary_crossentropy',
              metrics = ['accuracy'])

In [None]:
# Creates train, validation and test generators
train_datagen = ImageDataGenerator(rescale = 1.0/255., rotation_range = 40,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   fill_mode = 'nearest')
validation_datagen = ImageDataGenerator(rescale = 1/255.)
test_datagen = ImageDataGenerator(rescale = 1/255.)

training_dir = '/tmp/cats-vs-dogs/training'
validation_dir = '/tmp/cats-vs-dogs/validation'
test_dir = '/tmp/cats-vs-dogs/test'

train_generator = train_datagen.flow_from_directory(training_dir,
                                                    batch_size = 20,
                                                    class_mode = 'binary',
                                                    target_size = (150, 150))

validation_generator = validation_datagen.flow_from_directory(validation_dir,
                                                              batch_size = 20,
                                                              class_mode = 'binary',
                                                              target_size = (150, 150))

test_generator = test_datagen.flow_from_directory(test_dir,
                                                  batch_size = 20,
                                                  class_mode = None,
                                                  target_size = (150, 150),
                                                  shuffle = False)


In [27]:
# Defines callbacks
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs = {}):
        if(logs.get('accuracy') > 0.98):
            print('Reached 98% accuracy')
            model.stop_training = True

callback = myCallback()

In [None]:
# Fits the model
history = model.fit(train_generator,
                              validation_data = validation_generator,
                              steps_per_epoch = 22500//20,
                              epochs = 2,
                              validation_steps = 2500//20,
                              callbacks = [callback]
                              )

In [21]:
# Saves and loads models
model.save('model_inceptiontf_30ep.h5')
#from tensorflow.keras.models import load_model
#model = load_model('/model_aug_lr001_20ep.h5')

In [None]:
model.summary()

In [None]:
# Plots accuracy and loss
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs

plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")

plt.title('Training and validation loss')

In [None]:
# Makes prediticion for test set
preds = model.predict(test_generator)
print(preds[:5])

In [20]:
# Prepares submission file and converts it to csv
submission = pd.DataFrame(preds)
submission = submission.rename(columns = {0: 'label'})
submission = submission.reset_index()
submission = submission.rename(columns = {'index': 'id'})
submission['id'] = submission.index + 1
submission.to_csv('r.csv', index = False)

In [None]:
# Lets you upload your own image and classifies it
import numpy as np
from google.colab import files
from keras.preprocessing import image

uploaded = files.upload()

for fn in uploaded.keys():
 
  # predicting images
  path = '/content/' + fn
  img = image.load_img(path, target_size=(150, 150))
  x = image.img_to_array(img)
  x = x/255.
  x = np.expand_dims(x, axis=0)

  images = np.vstack([x])
  classes = model.predict(images, batch_size=10)
  print(classes[0])
  if classes[0] > 0.5:
      print('It is a dog')
  else: print('It is a cat')
