# Dog breed classification

## Data exploration

In [2]:
import csv
import matplotlib.pylab as plt
import os
from keras.applications.resnet50 import ResNet50
import numpy as np
import PIL

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### Data distrubution

### Load data

In [3]:
from keras.preprocessing import image 
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential
from keras.regularizers import l2
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from sklearn.datasets import load_files       
from keras.utils import np_utils
from keras.applications import *
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.applications.xception import Xception, preprocess_input
import numpy as np
from glob import glob
from PIL import ImageFile  
from tqdm import tqdm
ImageFile.LOAD_TRUNCATED_IMAGES = True    
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob import glob

In [4]:
def load_dataset(path):
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']), 133)
    return dog_files, dog_targets

def path_to_tensor(img_path, size = 224):
    img = image.load_img(img_path, target_size=(size, size))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

### Global variables

In [5]:
train_files_name = "dogImages/train"
test_file_name = "dogImages/test"
dog_names = [item for item in sorted(glob(train_files_name + "/*/"))]

In [6]:
# load data
train_files, train_targets = load_dataset(train_files_name)
test_files, test_targets = load_dataset(test_file_name)

# parse tensors
train_tensors = paths_to_tensor(train_files)
test_tensors = paths_to_tensor(test_files)

100%|██████████| 6680/6680 [02:23<00:00, 46.56it/s] 
100%|██████████| 836/836 [00:13<00:00, 62.26it/s] 


## CNN from scratch

### Model

### Accuracy

## Transfer learning

### Extract features from ResNet50

In [7]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [8]:
def extract_Resnet50(tensor):
    return ResNet50(weights='imagenet', include_top=False, pooling="avg").predict(preprocess_input(tensor))

In [14]:
def expand_dimensions(f):
    f = np.expand_dims(f, axis=1)
    f = np.expand_dims(f, axis=1)
    return f

In [None]:
# extract feature
train_features_resnet50 = extract_Resnet50(train_tensors)
test_features_resnet50 = extract_Resnet50(test_tensors)

# expand dimensions
train_features_resnet50 = expand_dimensions(train_features_resnet50)
test_features_resnet50 = expand_dimensions(test_tensors)

# save as npz file
np.savez_compressed('resnet50data',test=test_features_resnet50, train = train_features_resnet50)

In [15]:
resnet50Data = np.load('resnet50data.npz')
resnet50Train = resnet50Data['train']
resnet50Test = resnet50Data['test']
resnet50Train = expand_dimensions(resnet50Train)
resnet50Test = expand_dimensions(resnet50Test)

### Train model

In [11]:
def tranfer_learning(trainingData, trainLabels, length):
    model = Sequential()
    model.add(GlobalAveragePooling2D(input_shape=trainingData.shape[1:]))
    model.add(Dense(1000, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(length, activation='softmax'))

    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

    model.fit(trainingData, trainLabels, epochs=10, batch_size=20, verbose=1)
    return model

In [None]:
resnet_model = tranfer_learning(resnet50Train, train_targets, len(dog_names))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_2 ( (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1000)              2049000   
_________________________________________________________________
dropout_1 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 133)               133133    
Total params: 2,182,133
Trainable params: 2,182,133
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10

### Test accuracy

In [None]:
def calculate_accuracy(model, testData, testLabel, m):
    i = 0
    a = 0
    for test in testData:
        arr = model.predict(np.expand_dims(test,axis = 0))
        idx = np.argsort(-arr[0])[:m]
        if np.argmax(testLabel[i]) in idx:
            a += 1
        i += 1
    return a/len(testLabel)

In [None]:
# test accuracy
m = 3
acc = calculate_accuracy(resnet_model, resnet50Test, test_labels, m)

## Prediction

In [None]:
def make_prediction(dog_names, model, testData, m):
    labelList = []
    
    for test in testData:
        arr = model.predict(np.expand_dims(test,axis = 0))
        idx = np.argsort(-arr[0])[:m]
        print(idx)
        l = [dog_names[index] for index in idx]
        labelList.append(l)
        
    return labelList

In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt 

def show_sample(name):
    pp, ax = plt.subplots()
    print(name[4:])
    path = "dogImages/train/{0}/*".format(name)
    
    path = glob(path)[0]
    
    img = mpimg.imread(path)
    pp = ax.imshow(img)        
    plt.axis('off')
    plt.show()

In [None]:
def preprocess(path, npz_name):
    files, targets = load_dataset(path)
    tensors = paths_to_tensor(files)
    features_resnet50 = extract_Resnet50(tensors)
    features_resnet50 = expand_dimensions(features_resnet50)
    
    np.savez_compressed(npz_name ,test=features_resnet50)


In [None]:
def resnet50_predictor(path, name, model, m=3):
    preprocess(path, name)
    
    features = np.load(name + ".npz")['test']
    make_prediction(dog_names, model, features, m)

In [None]:
dog_dir = "testImage/test"
resnet50_predictor(dog_dir , 'dog_feature', resnet_model)