In [None]:
#https://github.com/machrisaa/tensorflow-vgg
#To use the VGG networks, the npy files for [VGG16 NPY](https://mega.nz/#!YU1FWJrA!O1ywiCS2IiOlUCtCpI6HTJOMrneN-Qdv3ywQP5poecM) has to be downloaded.

In [1]:
import tensorflow as tf
import numpy as np
import os
import os.path
import vgg16 
import utils
import transfer_learning

  return f(*args, **kwds)


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
def make_final_layers(feature_tensor, num_classes):

    feature_tensor_size = int(feature_tensor.shape[1])
    
    with tf.variable_scope('input'):
        feature_input = tf.placeholder_with_default(
            feature_tensor,
            [None, feature_tensor_size],
            'feature_input')
        
        label_input = tf.placeholder(tf.int64, [None], name='label_input')
    
    logits = tf.layers.dense(feature_input, num_classes)
    loss = tf.losses.sparse_softmax_cross_entropy(labels=label_input, logits=logits)
    optimizer = tf.train.GradientDescentOptimizer(0.1)
    train_step = optimizer.minimize(loss)
    pred_prob = tf.nn.softmax(logits)
    loss_summary = tf.summary.scalar('cross_entropy', loss)
  
    return feature_input, label_input, logits, train_step, pred_prob,loss_summary

In [4]:
def compute_accuracy(labels, logits):

    prediction = tf.argmax(logits,1, name='pred_class')
    #true_label = tf.argmax(labels, name='true_class')
    accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, labels), tf.float32))
    accuracy_summary=tf.summary.scalar('accuracy', accuracy)

    return accuracy,accuracy_summary

In [5]:
def execute_train_step(session: tf.Session, current_step: int, summary_writer: tf.summary.FileWriter):
    
    _, ac,summary = session.run((train_step, accuracy,summary_op),
                                  feed_dict={feature_input: training_data_set['features'],
                                  label_input: training_data_set['labels']
                                 })
    
    summary_writer.add_summary(summary, current_step)
    
    if current_step % 10 == 0:
        print('Accuracy at step {0} is {1}'.format(current_step, ac))
        

In [6]:
def evaluate_images(session: tf.Session):
    
    ac = session.run(accuracy,feed_dict={feature_input: validate_data_set['features'],
                              label_input: validate_data_set['labels']
                             })

    return np.mean(ac)

In [7]:
def get_prob(session: tf.Session, test):

    
    pred = session.run(pred_prob,feed_dict={feature_input: test['features'],
                              label_input: test['labels']})
  
    
    return pred

In [8]:
def get_feature(session,image_list,vgg, batch_size): 
    
    
    ite = int(len(image_list)/batch_size)
    feature = np.array([],dtype=np.float32).reshape(0,1000)
    labels = []
    
    for j in range(ite):
        idx1_img = j * batch_size
        idx2_img = idx1_img + batch_size 
   
        imgbatches= []
    
        for i, (label, image) in enumerate(image_list[idx1_img:idx2_img]):
            labels.append(label)
            img = utils.load_image(image)
            imgbatch=img.reshape((1, 224, 224, 3))
            if(i==0):
                imgbatches=imgbatch
            else:
                imgbatches= np.concatenate((imgbatches, imgbatch), 0)


        feature_output = session.run(vgg.fc8, feed_dict={images: imgbatches})  
        feature =  np.concatenate((feature,feature_output), axis = 0)
        
        #output['labels'] = np.stack(labels[idx1_img:idx2_img])
        #output['features'] = np.stack(feature_output)
        #matrix_path = os.path.join("./data", str(image_list)+ str(idx1_img)+':'+str(idx2_img) + '.npy')
        #np.save(matrix_path, output)
        
        print(j)
    
    return {
        'labels': np.stack(labels),
        'features': np.stack(feature)
     }


In [9]:
VGG_LOG_DIR = './tmp/vgg16_log'
if not os.path.exists(VGG_LOG_DIR):
    os.makedirs(VGG_LOG_DIR)

In [10]:
from datetime import datetime

start1 = datetime.now()
with tf.Session() as session:
    vgg = vgg16.Vgg16()
    images = tf.placeholder("float", [20, 224, 224, 3])  
    with tf.name_scope("content_vgg"):
        vgg.build(images) 

    #feed_dict={feature_input: training_data_set['features'],
                                  #label_input: training_data_set['labels']})
    feature_t = tf.placeholder("float", [20, 1000])   
    feature_input, label_input, logits, train_step, pred_prob,loss_summary= make_final_layers(feature_t, 2)
    accuracy, accuracy_summary= compute_accuracy(label_input, logits)
    summary_op = tf.summary.merge([loss_summary, accuracy_summary])
    
    
    print('------------- Starting training ----------------')
    session.run(tf.global_variables_initializer())
    
    #training_data_set = get_feature(session, training_images, vgg)
    #np.save('train_feature.npy',training_data_set)
    training_data_set = np.load('train_feature.npy')
    training_data_set = training_data_set.item()
    
    summary_writer = tf.summary.FileWriter(os.path.join(VGG_LOG_DIR, 'retrained'), session.graph_def)
    for i in range(120):
        execute_train_step(session, i, summary_writer)
        
    end1 = datetime.now()
    
    training = end1-start1
    print(training)
    #summary_writer.close()  
    
    print('------------- Training done! -------------------')
    #print('---------- Loading testing data ----------------')
    #tlabels, timages = transfer_learning.get_testing_data(testing_images)
    print('----------- Evaluating on testing --------------')
    end2 = datetime.now()
    
    #validate_data_set = get_feature(session, testing_images, vgg)
    #np.save('validate_feature.npy', validate_data_set)
    validate_data_set  = np.load('validate_feature.npy')
    validate_data_set  = validate_data_set.item()
    
    eval_accuracy = evaluate_images(session)
    
    print('Evaluation accuracy was: {0}'.format(eval_accuracy))
    end3 = datetime.now()
    test_time = end3-end2
    print(test_time)
    
    print('----------- Doing testing --------------')
    testing, _, _ = transfer_learning.create_image_lists('../data/test' , testing_percentage=0, max_number_images=13000)
    #testing_feature = get_feature(session, testing, vgg)
    #np.save('test_feature.npy', testing_feature)
    
    testing_feature  = np.load('test_feature.npy')
    testing_feature   = testing_feature.item()  
    test_result = get_prob(session, testing_feature)
      
    test_left = testing[12480:]
    testing_feature_last = get_feature(session, test_left ,vgg, batch_size=20)
    test_result_last = get_prob(session, testing_feature_last)
    
    test_result_full = np.concatenate((test_result,test_result_last), axis = 0)
    
    


/Users/chenyunwu/Desktop/proj5/vgg/vgg16.npy
npy file loaded
build model started
build model finished: 5s
------------- Starting training ----------------
Accuracy at step 0 is 0.42476850748062134
Accuracy at step 10 is 0.9742699265480042
Accuracy at step 20 is 0.9799679517745972
Accuracy at step 30 is 0.98170405626297
Accuracy at step 40 is 0.9837072491645813
Accuracy at step 50 is 0.9850872755050659
Accuracy at step 60 is 0.98615562915802
Accuracy at step 70 is 0.9866898059844971
Accuracy at step 80 is 0.9874910712242126
Accuracy at step 90 is 0.9879807829856873
Accuracy at step 100 is 0.9877136945724487
Accuracy at step 110 is 0.9881142973899841
0:00:27.327219
------------- Training done! -------------------
----------- Evaluating on testing --------------
Evaluation accuracy was: 0.989182710647583
0:00:00.089604
----------- Doing testing --------------
INFO:tensorflow:Looking for images in 'small'
0


In [11]:
import re
import csv
import pandas as pd

names = [re.search('[0-9]+',i[1]).group() for i in testing]
vgg_result = np.column_stack((names,test_result_full))
colnames = ['id','cat','dog']
vgg_result_pd = pd.DataFrame(vgg_result, index=names, columns=colnames)
vgg_result_pd.to_csv('vgg_full.csv',index=False,header=True)