In [1]:
import os
import numpy as np #linear algebra
import pandas as pd #data preprocessing
import matplotlib.pyplot as plt #data visualization
import h5py
import PIL

import utils.general_utils as util
from utils.general_utils import populate_breeds, get_imgMatrix_from_id, get_breed_value_from_id, get_filename_from_id, get_id_from_filename
#other built utilities

from data_loader import dataFrameBuilder

#using inception_v3 to classify dog breeds
import tensorflow as tf #import tensroflow

import random

In [2]:
#PARAMETERS
image_size = 500 #all images are size image_size x image_size x 3
batch_size = 10
num_classes = 120

In [3]:
%matplotlib inline

#prepare csv files
train = pd.read_csv("../data/included/labels.csv")
test = pd.read_csv("../data/included/test_id.csv")
BREED_LIST = "../data/preprocessed_data/breed_list.csv"

In [4]:
#prepare the breed list dataframe
labels = populate_breeds(BREED_LIST) #get the list of all dog breeds
labels_np = np.array(labels).reshape(120,1) #labels list reshaped to numpy array

In [5]:
#x = tf.placeholder(tf.float32, shape=[None, 500,500, 3], name='input_data')

x = tf.keras.layers.Input(shape=(500,500,3), batch_size=batch_size,name='input_data',dtype='float32')

y = tf.placeholder(tf.float32, shape=[None, 120,1], name='correct_labels')
#x=tf.placeholder(tf.float32, shape=[500,500,3],name='input_data')

y_pred = tf.placeholder(tf.float32, shape=[None,120,1], name='predicted_labels')


In [6]:
def train_input_fn(index=0, data_amnt = 1):
    input_img_data = dataFrameBuilder(data_amount=data_amnt,
                                      start_index=index)
    #df.shuffle().repeat().batch(batch_size)
    #print(df.sample(1))
    #input_img_data = df.as_matrix(columns=['Image Data'])
    #input_img_data.reshape([500,500,3])
    input_img_data = np.asarray(input_img_data)
    
    return input_img_data

def train_output_fn(index=0,data_amnt = 1):
    output_breed_data = dataFrameBuilder(data_amount=data_amnt,
                                         start_index=index,
                                         ret_input=False,
                                         ret_output=True)
    #return df.as_matrix(columns=['Breed'])
    output_breed_data = np.asarray(output_breed_data)
    return output_breed_data

'''
batch_size
    the number of samples returned

features
    either 'train' to use training data
    or 'test' to return testing data
'''
def generator(batch_size, features = train):
    # Create empty arrays to contain batch of features and labels#
    batch_features = np.zeros((batch_size, 500, 500, 3))
    batch_labels = np.zeros((batch_size,120))
    while True:
        for i in range(batch_size):     
            # choose random index in features
            index= random.choice([len(features),1])
            batch_features[i] = train_input_fn(index=index, data_amnt=1)
            batch_labels[i] = train_output_fn(index=index, data_amnt=1)
        yield batch_features, batch_labels


In [7]:
inception_v3 = tf.keras.applications.InceptionV3(include_top=False,
                                                input_tensor=x,
                                                classes=120)

#set imagedata to channels_last for best performance


In [8]:
# add a global spatial average pooling layer
output_layer = inception_v3.output
output_layer = tf.keras.layers.GlobalAveragePooling2D()(output_layer)
# let's add a fully-connected layer
output_layer = tf.keras.layers.Dense(1024, activation='relu')(output_layer)
# and a logistic layer -- let's say we have 200 classes
predictions = tf.keras.layers.Dense(120, activation='softmax')(output_layer)

In [9]:
# this is the model we will train
model = tf.keras.Model(inputs=inception_v3.input, outputs=predictions)

In [10]:
for i, layer in enumerate(model.layers):
   print(i, layer.name)

0 input_data
1 conv2d_1
2 batch_normalization_1
3 activation_1
4 conv2d_2
5 batch_normalization_2
6 activation_2
7 conv2d_3
8 batch_normalization_3
9 activation_3
10 max_pooling2d_1
11 conv2d_4
12 batch_normalization_4
13 activation_4
14 conv2d_5
15 batch_normalization_5
16 activation_5
17 max_pooling2d_2
18 conv2d_9
19 batch_normalization_9
20 activation_9
21 conv2d_7
22 conv2d_10
23 batch_normalization_7
24 batch_normalization_10
25 activation_7
26 activation_10
27 average_pooling2d_1
28 conv2d_6
29 conv2d_8
30 conv2d_11
31 conv2d_12
32 batch_normalization_6
33 batch_normalization_8
34 batch_normalization_11
35 batch_normalization_12
36 activation_6
37 activation_8
38 activation_11
39 activation_12
40 mixed0
41 conv2d_16
42 batch_normalization_16
43 activation_16
44 conv2d_14
45 conv2d_17
46 batch_normalization_14
47 batch_normalization_17
48 activation_14
49 activation_17
50 average_pooling2d_2
51 conv2d_13
52 conv2d_15
53 conv2d_18
54 conv2d_19
55 batch_normalization_13
56 batch_no

In [11]:
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allocator_type = 'BFC'
config.gpu_options.per_process_gpu_memory_fraction = 0.40
config.gpu_options.allow_growth = True

In [12]:
tf.reset_default_graph()
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
#with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as ss:
                             
x = tf.keras.layers.Input(shape=(500,500,3), batch_size=batch_size,name='input_data',dtype='float32')

y = tf.placeholder(tf.float32, shape=[None, 120,1], name='correct_labels')
#x=tf.placeholder(tf.float32, shape=[500,500,3],name='input_data')

y_pred = tf.placeholder(tf.float32, shape=[None,120,1], name='predicted_labels')


inception_v3 = tf.keras.applications.InceptionV3(include_top=False,
                                                 weights='imagenet',
                                                input_tensor=x,
                                                classes=120)

#steps for adding a new output layer
output_layer = inception_v3.output
output_layer = tf.keras.layers.GlobalAveragePooling2D()(output_layer) #replace the current global avg pool 2d
output_layer = tf.keras.layers.Dense(1024, activation='relu')(output_layer) 
predictions = tf.keras.layers.Dense(120, activation='softmax')(output_layer) #120 classes in the new model

model = tf.keras.Model(inputs=inception_v3.input, outputs=predictions)

model.compile(loss=tf.keras.losses.categorical_crossentropy,
                    optimizer='sgd')

#img_data = train_input_fn(data_amnt=batch_size)
#breed_data = train_output_fn(data_amnt=batch_size)

#model.fit(x=img_data, y=breed_data, batch_size=batch_size)

#inception_v3.fit_generator(generator)
#model.fit_generator(generator(features, labels, batch_size), samples_per_epoch=50, nb_epoch=10)
#print(img_data[0])
model.fit_generator(generator(batch_size), steps_per_epoch=10, epochs=50)


index = 11

img_data = train_input_fn(index=index, data_amnt=batch_size)
breed_data = train_output_fn(index=index, data_amnt=batch_size)

#sample_weight=np.transpose(np.ones(120, dtype='float32'))
#model.evaluate(x=img_data,y=breed_data,batch_size=batch_size, sample_weight=sample_weight) 
model.evaluate(x=img_data, y=breed_data,batch_size=batch_size)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


7.244642734527588

In [33]:
def test_input_fn(index=0, data_amnt = 1):
    input_img_data = dataFrameBuilder(data_amount=data_amnt,
                                      start_index=index,
                                     dir="../data/preprocessed_data/Test/")

    input_img_data = np.asarray(input_img_data)
    return input_img_data


data_files = os.listdir("../data/preprocessed_data/Test/") #get a list of all filenames from Train dir
file_index = 0

'''
temp_id = get_id_from_filename(data_files[])
temp_file_data = test_input_fn(index=0, data_amnt=10)
temp_file_prediction = model.predict(x=file_data, batch_size = 10)
'''

file_ids = []
file_prediction = []

for file in data_files:   
    print(file_index, ' ', file)
    #append id to a list
    #once the 10th id is reached, append all predictions to a list
    #combine
    
    file_ids.append(get_id_from_filename(file)) #append the current id to the list of ids
    if(file_index%10 == 0): #every 10 indexes        
        file_data= test_input_fn(index=file_index, data_amnt=10)    
        predictions = model.predict(x=file_data, batch_size = 10)
        for prediction in predictions:
            file_prediction.append(prediction)

    if(file_index>= 9):
        break
    
    file_index += 1
        
   

0   000621fb3cbb32d8935728e48679680e.png
1   00102ee9d8eb90812350685311fe5890.png
2   0012a730dfa437f5f3613fb75efcd4ce.png
3   001510bc8570bbeee98c8d80c8a95ec1.png
4   001a5f3114548acdefa3d4da05474c2e.png
5   00225dcd3e4d2410dd53239f95c0352f.png
6   002c2a3117c2193b4d26400ce431eebd.png
7   002c58d413a521ae8d1a5daeb35fc803.png
8   002f80396f1e3db687c5932d7978b196.png
9   0036c6bcec6031be9e62a257b1c3c442.png


In [36]:
print(file_ids)
print("\n")
print(file_prediction[0][1])

['000621fb3cbb32d8935728e48679680e', '00102ee9d8eb90812350685311fe5890', '0012a730dfa437f5f3613fb75efcd4ce', '001510bc8570bbeee98c8d80c8a95ec1', '001a5f3114548acdefa3d4da05474c2e', '00225dcd3e4d2410dd53239f95c0352f', '002c2a3117c2193b4d26400ce431eebd', '002c58d413a521ae8d1a5daeb35fc803', '002f80396f1e3db687c5932d7978b196', '0036c6bcec6031be9e62a257b1c3c442']


0.0003945509


In [113]:
df_labels = ['id'] #set up the dataframe column labels

for label in labels_np: #append each breed as a new column
    df_labels.append(label[0])

df_labels_np = np.asarray(list(df_labels)).T

    
predictions_data = []

for id in file_ids:
    i = 0 #index in current row of data
    single_prediction = []
    single_prediction.append(id) 
    for column in file_prediction[i]: #access each individual prediction in a single id's row
        single_prediction.append(column) #add a single breed prediction to the data
    
        
    single_prediction = np.asarray(single_prediction).reshape((1,121))
    #print(single_prediction.shape)
    if(i == 0):
        predictions_data = single_prediction[0]
    
    #predictions_data = np.concatenate(predictions_data, single_prediction[0])
    #print(i, ' ', predictions_data[i])
    i += 1 #iterate to the next id


#predictions_data = np.asarray(predictions_data)
#predictions_data.shape()
output = np.vstack((predictions_data, predictions_data))
print(output.shape)
predictions_df = pd.DataFrame(output, columns=df_labels)

['id', 'affenpinscher', 'afghan_hound', 'african_hunting_dog', 'airedale', 'american_staffordshire_terrier', 'appenzeller', 'australian_terrier', 'basenji', 'basset', 'beagle', 'bedlington_terrier', 'bernese_mountain_dog', 'black-and-tan_coonhound', 'blenheim_spaniel', 'bloodhound', 'bluetick', 'border_collie', 'border_terrier', 'borzoi', 'boston_bull', 'bouvier_des_flandres', 'boxer', 'brabancon_griffon', 'briard', 'brittany_spaniel', 'bull_mastiff', 'cairn', 'cardigan', 'chesapeake_bay_retriever', 'chihuahua', 'chow', 'clumber', 'cocker_spaniel', 'collie', 'curly-coated_retriever', 'dandie_dinmont', 'dhole', 'dingo', 'doberman', 'english_foxhound', 'english_setter', 'english_springer', 'entlebucher', 'eskimo_dog', 'flat-coated_retriever', 'french_bulldog', 'german_shepherd', 'german_short-haired_pointer', 'giant_schnauzer', 'golden_retriever', 'gordon_setter', 'great_dane', 'great_pyrenees', 'greater_swiss_mountain_dog', 'groenendael', 'ibizan_hound', 'irish_setter', 'irish_terrier',

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,0036c6bcec6031be9e62a257b1c3c442,0.0007825534,0.0003945509,0.00015902561,0.0007761166,0.0007525283,0.00047848417,0.0007207573,0.0007219867,0.000770615,...,0.0002519139,0.00047761246,0.0010034071,0.00040659515,0.0004355653,0.00046328665,0.0006567028,0.00039705768,0.00082491693,0.0007428286
