In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import time
from datetime import timedelta
import math
import os
import scipy.misc
from scipy.stats import itemfreq
from random import sample
import pickle
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# Image manipulation.
import PIL.Image
from IPython.display import display
#from resizeimage import resizeimage

#Panda
import pandas as pd

#Open a Zip File
from zipfile import ZipFile
from io import BytesIO

In [2]:
archive_train = ZipFile("../dogs_data/train.zip", 'r')
archive_test = ZipFile("../dogs_data/test.zip", 'r')

#This line shows the 5 first image name of the train database
archive_train.namelist()[0:5]

#This line shows the number of images in the train database
len(archive_train.namelist()[:])-1 #we must remove the 1st value

10222

In [None]:
archive_train

In [11]:
def DataBase_creator(archivezip, nwigth, nheight, save_name):
    #We choose the archive (zip file) + the new wigth and height for all the image which will be reshaped
    
    # Start-time used for printing time-usage below.
    start_time = time.time()
    
    s = (len(archivezip.namelist()[:])-1, nwigth, nheight,3) #nwigth x nheight = number of features because images are nwigth x nheight pixels
    allImage = np.zeros(s)

    for i in range(1,len(archivezip.namelist()[:])):
        filename = BytesIO(archivezip.read(archivezip.namelist()[i]))
        image = PIL.Image.open(filename) # open colour image
        image = image.resize((nwigth, nheight))
        image = np.array(image)
        #image = np.clip(image/255.0, 0.0, 1.0) #255 = max of the value of a pixel

        allImage[i-1]=image
    
    #we save the newly created data base
    pickle.dump(allImage, open( save_name + '.p', "wb" ) )
    
    # Ending time.
    end_time = time.time()

    # Difference between start and end-times.
    time_dif = end_time - start_time

    # Print the time-usage.
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))

In [12]:
image_resize = 60

DataBase_creator(archivezip = archive_train, nwigth = image_resize, nheight = image_resize , save_name = "train")
DataBase_creator(archivezip = archive_test, nwigth = image_resize, nheight = image_resize , save_name = "test")

Time usage: 0:00:32
Time usage: 0:00:33


In [13]:
train = pickle.load( open( "train.p", "rb" ) )
train.shape

(10222, 60, 60, 3)

In [14]:
test = pickle.load( open( "test.p", "rb" ) )
test.shape
test = test[:10222]

In [20]:
def create_network(features, labels, mode):
    input = tf.reshape(features['x'], [-1, 60, 60, 3])
    
    # receives [batch_size, 256, 256, 3]
    # returns [batch_size, 256, 256, 32]
    convolution1 = tf.layers.conv2d(inputs = input, filters = 32, kernel_size = [5,5], activation = tf.nn.relu,
                                 padding = 'same')
    
    # receives [batch_size, 256, 256, 3]
    # returns [batch_size, 128, 128, 32]
    pooling1 = tf.layers.max_pooling2d(inputs = convolution1, pool_size = [2,2], strides = 2)
    
    # receives [batch_size, 128, 128, 32]
    # returns [batch_size, 128, 128, 64]
    convolution2 = tf.layers.conv2d(inputs = pooling1, filters = 64, kernel_size = [5,5], activation = tf.nn.relu,
                                  padding = 'same')
    
    # receives [batch_size, 128, 128, 64]
    # returns [batch_size, 64, 64, 64]
    pooling2 = tf.layers.max_pooling2d(inputs = convolution2, pool_size = [2,2], strides = 2)
    
    convolution3 = tf.layers.conv2d(inputs = pooling2, filters = 128, kernel_size = [5,5], activation = tf.nn.relu,
                                 padding = 'same')
    
    pooling3 = tf.layers.max_pooling2d(inputs = convolution3, pool_size = [2,2], strides = 2)
    
    print(pooling3.shape)
    # receives [batch_size, 7, 7, 64]
    # returns [batch_size, 3136]
    flattening = tf.reshape(pooling3, [-1, 7 * 7 * 128])
    
    # 3136 inputs -> 1024 neurons on hidden layer -> 10 outputs
    # receives [batch_size, 3136]
    # returns [batch_size, 1024]
    dense = tf.layers.dense(inputs = flattening, units = 1024, activation = tf.nn.relu)
    
    dense2 = tf.layers.dense(inputs = dense, units = 1024, activation = tf.nn.relu)
    
    # dropout
    dropout =  tf.layers.dropout(inputs = dense2, rate = 0.2, training = mode == tf.estimator.ModeKeys.TRAIN)
    
    # receives [batch_size, 1024]
    # returns [batch_size, 10]
    output = tf.layers.dense(inputs = dropout, units = 10)
    
    predictions = tf.argmax(output, axis = 1)
    
    if(mode == tf.estimator.ModeKeys.PREDICT):
        return tf.estimator.EstimatorSpec(mode = mode, predictions = predictions)  
    
    losses = tf.losses.sparse_softmax_cross_entropy(labels = labels, logits = output)
    
    if(mode == tf.estimator.ModeKeys.TRAIN):
        optimizer = tf.train.AdamOptimizer(learning_rate = 0.001)
        train = optimizer.minimize(losses, global_step = tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode = mode, loss = losses, train_op = train)
    
    if(mode == tf.estimator.ModeKeys.EVAL):
        eval_metrics_ops = {'accuracy': tf.metrics.accuracy(labels = labels, predictions = predictions)}
        return tf.estimator.EstimatorSpec(mode = mode, loss = losses, eval_metric_ops = eval_metrics_ops) 

classifier = tf.estimator.Estimator(model_fn = create_network)

train_function = tf.estimator.inputs.numpy_input_fn(x = {'x': train}, y = test, 
                                                        batch_size= 10000, num_epochs= None, shuffle= True)
classifier.train(input_fn = train_function, steps = 2000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp_uxmbzj3', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff0f26597f0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
(10000, 7, 7, 128)
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Us

ValueError: Rank mismatch: Rank of labels (received 4) should equal rank of logits minus 1 (received 2).