<a href="https://colab.research.google.com/github/Richish/Vision-Projects/blob/main/3CatsVsDogsWithXLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cats vs dogs classification using transfer learning

### downloading data

In [2]:
#!wget https://kaggle.com/c/dogs-vs-cats/redux-kernels-edition/download/train.zip

--2021-12-11 05:11:44--  https://kaggle.com/c/dogs-vs-cats/redux-kernels-edition/download/train.zip
Resolving kaggle.com (kaggle.com)... 35.244.233.98
Connecting to kaggle.com (kaggle.com)|35.244.233.98|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.kaggle.com/c/dogs-vs-cats/redux-kernels-edition/download/train.zip [following]
--2021-12-11 05:11:44--  https://www.kaggle.com/c/dogs-vs-cats/redux-kernels-edition/download/train.zip
Resolving www.kaggle.com (www.kaggle.com)... 35.244.233.98
Connecting to www.kaggle.com (www.kaggle.com)|35.244.233.98|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘train.zip’

train.zip               [ <=>                ]  22.97K  --.-KB/s    in 0.02s   

2021-12-11 05:11:45 (934 KB/s) - ‘train.zip’ saved [23521]



In [28]:
!unzip /content/train

Archive:  /content/train.zip
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of /content/train or
        /content/train.zip, and cannot find /content/train.ZIP, period.


In [None]:
!mv train data
!mkdir -p data/train/cat data/train/dog data/val/cat data/val/dog
!cd data


In [None]:
!ls|grep cat|sort -R|head -250|xargs -I {} mv {} train/cat
!ls|grep dog|sort -R|head -250|xargs -I {} mv {} train/dog
!ls|grep cat|sort -R|head -250|xargs -I {} mv {} val/cat
!ls|grep dog|sort -R|head -250|xargs -I {} mv {} val/dog

## Build the pipeline

In [7]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications.mobilenet import MobileNet, preprocess_input
import math

In [9]:
TRAIN_DATA_DIR = "data/train"
VALIDATION_DATA_DIR = "data/val"
TRAIN_SAMPLES = 500
VALIDATION_SAMPLES = 500
NUM_CLASSES = 2
IMG_WIDTH, IMG_HEIGHT = 224,224
BATCH_SIZE = 64

## Data Augmentation and preprocessing

In [10]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, zoom_range=0.2)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [None]:
train_generator = train_datagen.flow_from_directory(directory=TRAIN_DATA_DIR, target_size=(IMG_WIDTH, IMG_HEIGHT), 
                                                    batch_size=BATCH_SIZE, shuffle=True, seed=42, class_mode="categorical")
val_generator = val_datagen.flow_from_directory(directory=VALIDATION_DATA_DIR, target_size=(IMG_WIDTH, IMG_HEIGHT), 
                                                batch_size=BATCH_SIZE, shuffle=False, class_mode='categorical')


## Model definition

In [12]:
def model_maker():
    base_model = MobileNet(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), include_top=False)
    for layer in base_model.layers[:]:
        layer.trainable = False
    input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    custom_model = base_model(input)
    custom_model = GlobalAveragePooling2D()(custom_model)
    custom_model = Dense(units=64, activation='relu')(custom_model)
    custom_model = Dropout(0.5)(custom_model)
    predictions = Dense(NUM_CLASSES, activation="softmax")(custom_model)
    return Model(inputs=input, outputs=predictions)

## Training

In [18]:
model = model_maker()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="categorical_crossentropy", metrics=['acc'])
num_steps = math.ceil(float(TRAIN_SAMPLES)/BATCH_SIZE)
model.fit_generator(train_generator, steps_per_epoch=num_steps, epochs=10, validation_data=val_generator, validation_steps=num_steps)

In [None]:
model.save('model.h5')

## Testing the model

In [None]:
from tf.keras.models import load_model
model = load_model('model.h5')

In [24]:
from tensorflow.keras.preprocessing import image
import numpy as np
img_path = "sample_data/dog.jpg"
img = image.load_img(img_path, target_size=(224,224))
img_array = image.img_to_array(img)
expanded_img_array = np.expand_dims(img_array, axis=0)
preprocessed_img = preprocess_input(expanded_img_array)
prediction = model.predict(preprocessed_img)
print(prediction)
print(validation_generator.class_indices)

## Analyzing confidence levels of predictions

In [None]:
ground_truth = val_generator.classes
ground_truth

In [None]:
predictions = model.predict_generator(val_generator)


In [None]:
prediction_table = {}

for index, val in enumerate(predictions):
    index_of_highest_probability = np.argmax(val)
    value_of_highest_probability = val[index_of_highest_probability]
    prediction_table[index] = [value_of_highest_probability, index_of_highest_probability, ground_truth[index]]
assert len(predictions) == len(ground_truth) == len(prediction_table)
prediction_table


In [25]:
def display(sorted_indices, message):
    similar_images_paths = []
    distances = []
    for name, value in sorted_indeces:
        [probability, predicted_index, gt] = value
        similar_images_paths.append(VALIDATION_DATA_DIR+fnames[name])
        distances.append(probability)
        plot_images(similar_images_paths, distances, message)