# Pretrained VGGNet

In [1]:
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm

vgg_dir = 'tensorflow_vgg/'
# Make sure vgg exists
if not isdir(vgg_dir):
    raise Exception("VGG directory doesn't exist!")

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile(vgg_dir + "vgg16.npy"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='VGG16 Parameters') as pbar:
        urlretrieve(
            'https://s3.amazonaws.com/content.udacity-data.com/nd101/vgg16.npy',
            vgg_dir + 'vgg16.npy',
            pbar.hook)
else:
    print("Parameter file already exists!")

Parameter file already exists!


In [3]:
import os

import numpy as np
import tensorflow as tf

from tensorflow_vgg import vgg16
from tensorflow_vgg import utils

In [4]:
data_dir = 'train/'
contents = os.listdir(data_dir)
classes = [each for each in contents if os.path.isdir(data_dir + each)]

In [5]:
print(classes)

['real', 'fake']


In [6]:
# Set the batch size higher if you can fit in in your GPU memory
batch_size = 10
codes_list = []
labels = []
batch = []

codes = None

with tf.Session() as sess:
    vgg = vgg16.Vgg16()
    input_ = tf.placeholder(tf.float32, [None, 224, 224, 3])
    with tf.name_scope("content_vgg"):
        vgg.build(input_)

    for each in classes:
        print("Starting {} images".format(each))
        class_path = data_dir + each
        files = os.listdir(class_path)
        for ii, file in enumerate(files, 1):
            # Add images to the current batch
            # utils.load_image crops the input images for us, from the center
            img = utils.load_image(os.path.join(class_path, file))
            batch.append(img.reshape((1, 224, 224, 3)))
            labels.append(each)
            
            # Running the batch through the network to get the codes
            if ii % batch_size == 0 or ii == len(files):
                images = np.concatenate(batch)

                feed_dict = {input_: images}
                codes_batch = sess.run(vgg.relu6, feed_dict=feed_dict)
                
                # Here I'm building an array of the codes
                if codes is None:
                    codes = codes_batch
                else:
                    codes = np.concatenate((codes, codes_batch))
                
                # Reset to start building the next batch
                batch = []
                print('{} images processed'.format(ii))

/Users/cathylin/Desktop/EECS 349/Final_Project/tensorflow_vgg/vgg16.npy
npy file loaded
build model started
build model finished: 4s
Starting real images


  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


10 images processed
20 images processed
30 images processed
40 images processed
50 images processed
60 images processed
70 images processed
80 images processed
90 images processed
100 images processed
110 images processed
120 images processed
130 images processed
140 images processed
150 images processed
160 images processed
170 images processed
180 images processed
190 images processed
200 images processed
210 images processed
220 images processed
230 images processed
240 images processed
250 images processed
260 images processed
270 images processed
280 images processed
290 images processed
300 images processed
310 images processed
320 images processed
330 images processed
340 images processed
350 images processed
360 images processed
370 images processed
380 images processed
390 images processed
400 images processed
410 images processed
420 images processed
430 images processed
440 images processed
450 images processed
460 images processed
470 images processed
480 images processed
4

In [7]:
# write codes to file
with open('codes', 'w') as f:
    codes.tofile(f)
    
# write labels to file
import csv
with open('labels', 'w') as f:
    writer = csv.writer(f, delimiter='\n')
    writer.writerow(labels)

## Building the Classifier

In [8]:
print(labels)

['real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real', 'real',

In [9]:
# read codes and labels from file
import csv

with open('labels') as f:
    reader = csv.reader(f, delimiter='\n')
    labels = np.array([each for each in reader if len(each) > 0]).squeeze()
with open('codes') as f:
    codes = np.fromfile(f, dtype=np.float32)
    codes = codes.reshape((len(labels), -1))

In [10]:
print(codes.shape)

(1024, 4096)


In [11]:
print(codes[0])

[0.        0.        1.5262582 ... 2.821091  0.        2.3106177]


In [12]:
print(codes[0].shape)

(4096,)


In [13]:
print(labels[0])

real


In [14]:
print(sum(abs(codes[0]-codes[1])))

6011.737014427781


In [15]:
print(sum(abs(codes[0]-codes[0])))

0.0


In [16]:
print(sum(abs(codes[0]-codes[150])))

6886.401059038937


In [17]:
from numpy.linalg import norm

In [18]:
print(norm(codes[0]-codes[1]))

161.04782


In [19]:
print(norm(codes[0]-codes[150]))

181.32484


### First, convert all the images in the test dataset to vectors.

In [20]:
data_dir = 'test/'
contents = os.listdir(data_dir)
classes2 = [each for each in contents if os.path.isdir(data_dir + each)]

In [21]:
print(classes2)

['real', 'fake']


In [22]:
# Set the batch size higher if you can fit in in your GPU memory
batch_size = 10
codes_list = []
labels = []
batch = []

codes = None

with tf.Session() as sess:
    vgg = vgg16.Vgg16()
    input_ = tf.placeholder(tf.float32, [None, 224, 224, 3])
    with tf.name_scope("content_vgg"):
        vgg.build(input_)

    for each in classes:
        print("Starting {} images".format(each))
        class_path = data_dir + each
        files = os.listdir(class_path)
        for ii, file in enumerate(files, 1):
            # Add images to the current batch
            # utils.load_image crops the input images for us, from the center
            img = utils.load_image(os.path.join(class_path, file))
            batch.append(img.reshape((1, 224, 224, 3)))
            labels.append(each)
            
            # Running the batch through the network to get the codes
            if ii % batch_size == 0 or ii == len(files):
                images = np.concatenate(batch)

                feed_dict = {input_: images}
                codes_batch = sess.run(vgg.relu6, feed_dict=feed_dict)
                
                # Here I'm building an array of the codes
                if codes is None:
                    codes = codes_batch
                else:
                    codes = np.concatenate((codes, codes_batch))
                
                # Reset to start building the next batch
                batch = []
                print('{} images processed'.format(ii))

/Users/cathylin/Desktop/EECS 349/Final_Project/tensorflow_vgg/vgg16.npy
npy file loaded
build model started
build model finished: 3s
Starting real images
10 images processed
20 images processed
30 images processed
40 images processed
50 images processed
60 images processed
70 images processed
80 images processed
90 images processed
100 images processed
110 images processed
120 images processed
130 images processed
140 images processed
150 images processed
160 images processed
170 images processed
180 images processed
190 images processed
200 images processed
210 images processed
220 images processed
230 images processed
240 images processed
250 images processed
256 images processed
Starting fake images
10 images processed
20 images processed
30 images processed
40 images processed
50 images processed
60 images processed
70 images processed
80 images processed
90 images processed
100 images processed
110 images processed
120 images processed
130 images processed
140 images processed
150

In [23]:
# write codes to file
with open('codes2', 'w') as f:
    codes.tofile(f)
    
# write labels to file
import csv
with open('labels2', 'w') as f:
    writer = csv.writer(f, delimiter='\n')
    writer.writerow(labels)

In [24]:
print(codes.shape)

(512, 4096)


## Try 1-Nearest Neighbor

In [25]:
# read codes and labels from file
import csv

with open('labels') as f:
    reader = csv.reader(f, delimiter='\n')
    labels = np.array([each for each in reader if len(each) > 0]).squeeze()
with open('codes') as f:
    codes = np.fromfile(f, dtype=np.float32)
    codes = codes.reshape((len(labels), -1))

In [26]:
# read codes and labels from file
import csv

with open('labels2') as f:
    reader = csv.reader(f, delimiter='\n')
    labels2 = np.array([each for each in reader if len(each) > 0]).squeeze()
with open('codes2') as f:
    codes2 = np.fromfile(f, dtype=np.float32)
    codes2 = codes2.reshape((len(labels2), -1))

In [27]:
print(codes.shape)

(1024, 4096)


In [28]:
print(codes2.shape)

(512, 4096)


In [29]:
print(labels2.shape)

(512,)


In [30]:
print(len(codes))

1024


In [31]:
print(len(codes2))

512


### Using Manhattan distances (L1 distances)
### Euclidean distances (L2 distances)

In [32]:
#重置图（这个是为了用于多次运行）
tf.reset_default_graph()

#使用训练集数目为160条
#使用验证集（测试集）数目为80

xtr = tf.placeholder('float', [None, 4096])
xte = tf.placeholder('float', [4096])

#计算各个对应位置的距离（减法使用广播形式）
#底下俩作用相同
distance = tf.reduce_sum(tf.abs(tf.subtract(xtr, xte)), reduction_indices=1)

#寻找距离最近（即最相似的行所在位置）
pred = tf.arg_min(distance, 0)

accuracy = 0.

#初始化
init = tf.global_variables_initializer()


with tf.Session() as sess:
    sess.run(init)

    for i in range(len(codes2)):
        #计算最相近的所在行位置
        nn_index = sess.run(pred, feed_dict={xtr:codes, xte: codes2[i, :]})

        #取出测试集上最相近行对应的label与真是label对比
        print('Test', i, 'Prediction: ', labels[nn_index], \
               'True Class: ', labels2[i])
        if labels[nn_index] == labels2[i]:
            accuracy += 1./len(codes2)
    print('Done!')
    print('Accuracy: ', accuracy)

Instructions for updating:
Use `argmin` instead
Test 0 Prediction:  real True Class:  real
Test 1 Prediction:  real True Class:  real
Test 2 Prediction:  real True Class:  real
Test 3 Prediction:  real True Class:  real
Test 4 Prediction:  fake True Class:  real
Test 5 Prediction:  real True Class:  real
Test 6 Prediction:  real True Class:  real
Test 7 Prediction:  real True Class:  real
Test 8 Prediction:  real True Class:  real
Test 9 Prediction:  real True Class:  real
Test 10 Prediction:  real True Class:  real
Test 11 Prediction:  fake True Class:  real
Test 12 Prediction:  real True Class:  real
Test 13 Prediction:  real True Class:  real
Test 14 Prediction:  real True Class:  real
Test 15 Prediction:  real True Class:  real
Test 16 Prediction:  real True Class:  real
Test 17 Prediction:  real True Class:  real
Test 18 Prediction:  fake True Class:  real
Test 19 Prediction:  real True Class:  real
Test 20 Prediction:  real True Class:  real
Test 21 Prediction:  real True Class: 

Test 187 Prediction:  real True Class:  real
Test 188 Prediction:  real True Class:  real
Test 189 Prediction:  real True Class:  real
Test 190 Prediction:  real True Class:  real
Test 191 Prediction:  real True Class:  real
Test 192 Prediction:  real True Class:  real
Test 193 Prediction:  real True Class:  real
Test 194 Prediction:  real True Class:  real
Test 195 Prediction:  real True Class:  real
Test 196 Prediction:  real True Class:  real
Test 197 Prediction:  real True Class:  real
Test 198 Prediction:  real True Class:  real
Test 199 Prediction:  real True Class:  real
Test 200 Prediction:  real True Class:  real
Test 201 Prediction:  real True Class:  real
Test 202 Prediction:  real True Class:  real
Test 203 Prediction:  real True Class:  real
Test 204 Prediction:  real True Class:  real
Test 205 Prediction:  real True Class:  real
Test 206 Prediction:  real True Class:  real
Test 207 Prediction:  real True Class:  real
Test 208 Prediction:  real True Class:  real
Test 209 P

Test 386 Prediction:  fake True Class:  fake
Test 387 Prediction:  fake True Class:  fake
Test 388 Prediction:  real True Class:  fake
Test 389 Prediction:  fake True Class:  fake
Test 390 Prediction:  fake True Class:  fake
Test 391 Prediction:  real True Class:  fake
Test 392 Prediction:  fake True Class:  fake
Test 393 Prediction:  fake True Class:  fake
Test 394 Prediction:  fake True Class:  fake
Test 395 Prediction:  fake True Class:  fake
Test 396 Prediction:  fake True Class:  fake
Test 397 Prediction:  fake True Class:  fake
Test 398 Prediction:  real True Class:  fake
Test 399 Prediction:  fake True Class:  fake
Test 400 Prediction:  fake True Class:  fake
Test 401 Prediction:  fake True Class:  fake
Test 402 Prediction:  fake True Class:  fake
Test 403 Prediction:  fake True Class:  fake
Test 404 Prediction:  fake True Class:  fake
Test 405 Prediction:  real True Class:  fake
Test 406 Prediction:  fake True Class:  fake
Test 407 Prediction:  real True Class:  fake
Test 408 P

### Euclidean distances (L2 distances)
#### the results are same!!!

In [33]:
#重置图（这个是为了用于多次运行）
tf.reset_default_graph()

#使用训练集数目为160条
#使用验证集（测试集）数目为80

xtr = tf.placeholder('float', [None, 4096])
xte = tf.placeholder('float', [4096])

#计算各个对应位置的距离（减法使用广播形式）
# L2 normalization
#distance = tf.norm(tf.abs(tf.subtract(xtr, xte)), axis=1, ord=2)
distance = tf.sqrt(tf.reduce_sum(tf.pow(tf.add(xtr,tf.negative(xte)),2),reduction_indices=1))

#寻找距离最近（即最相似的行所在位置）
pred = tf.arg_min(distance, 0)

accuracy = 0.

#初始化
init = tf.global_variables_initializer()


with tf.Session() as sess:
    sess.run(init)

    for i in range(len(codes2)):
        #计算最相近的所在行位置
        nn_index = sess.run(pred, feed_dict={xtr:codes, xte: codes2[i, :]})

        #取出测试集上最相近行对应的label与真是label对比
        print('Test', i, 'Prediction: ', labels[nn_index], \
               'True Class: ', labels2[i])
        if labels[nn_index] == labels2[i]:
            accuracy += 1./len(codes2)
    print('Done!')
    print('Accuracy: ', accuracy)

Instructions for updating:
Use `argmin` instead
Test 0 Prediction:  real True Class:  real
Test 1 Prediction:  real True Class:  real
Test 2 Prediction:  real True Class:  real
Test 3 Prediction:  real True Class:  real
Test 4 Prediction:  fake True Class:  real
Test 5 Prediction:  real True Class:  real
Test 6 Prediction:  real True Class:  real
Test 7 Prediction:  real True Class:  real
Test 8 Prediction:  real True Class:  real
Test 9 Prediction:  real True Class:  real
Test 10 Prediction:  real True Class:  real
Test 11 Prediction:  fake True Class:  real
Test 12 Prediction:  real True Class:  real
Test 13 Prediction:  real True Class:  real
Test 14 Prediction:  real True Class:  real
Test 15 Prediction:  real True Class:  real
Test 16 Prediction:  real True Class:  real
Test 17 Prediction:  real True Class:  real
Test 18 Prediction:  fake True Class:  real
Test 19 Prediction:  real True Class:  real
Test 20 Prediction:  real True Class:  real
Test 21 Prediction:  real True Class: 

Test 180 Prediction:  real True Class:  real
Test 181 Prediction:  real True Class:  real
Test 182 Prediction:  real True Class:  real
Test 183 Prediction:  real True Class:  real
Test 184 Prediction:  real True Class:  real
Test 185 Prediction:  real True Class:  real
Test 186 Prediction:  real True Class:  real
Test 187 Prediction:  real True Class:  real
Test 188 Prediction:  real True Class:  real
Test 189 Prediction:  fake True Class:  real
Test 190 Prediction:  real True Class:  real
Test 191 Prediction:  real True Class:  real
Test 192 Prediction:  real True Class:  real
Test 193 Prediction:  real True Class:  real
Test 194 Prediction:  real True Class:  real
Test 195 Prediction:  real True Class:  real
Test 196 Prediction:  real True Class:  real
Test 197 Prediction:  real True Class:  real
Test 198 Prediction:  real True Class:  real
Test 199 Prediction:  real True Class:  real
Test 200 Prediction:  real True Class:  real
Test 201 Prediction:  real True Class:  real
Test 202 P

Test 371 Prediction:  fake True Class:  fake
Test 372 Prediction:  fake True Class:  fake
Test 373 Prediction:  fake True Class:  fake
Test 374 Prediction:  fake True Class:  fake
Test 375 Prediction:  fake True Class:  fake
Test 376 Prediction:  real True Class:  fake
Test 377 Prediction:  fake True Class:  fake
Test 378 Prediction:  fake True Class:  fake
Test 379 Prediction:  fake True Class:  fake
Test 380 Prediction:  real True Class:  fake
Test 381 Prediction:  real True Class:  fake
Test 382 Prediction:  fake True Class:  fake
Test 383 Prediction:  fake True Class:  fake
Test 384 Prediction:  fake True Class:  fake
Test 385 Prediction:  fake True Class:  fake
Test 386 Prediction:  fake True Class:  fake
Test 387 Prediction:  fake True Class:  fake
Test 388 Prediction:  real True Class:  fake
Test 389 Prediction:  fake True Class:  fake
Test 390 Prediction:  fake True Class:  fake
Test 391 Prediction:  real True Class:  fake
Test 392 Prediction:  fake True Class:  fake
Test 393 P

## Try use different K, use K=3, and select majority of the Class

In [34]:
def get_variables(xtr, xte):
    
    # calculate the distance
    distance = tf.reduce_sum(tf.abs(tf.subtract(xtr, xte)), reduction_indices=1)
    
    # change it to neg, because tf.nn.top returns the max value.
    neg_one = tf.constant(-1.0)
    neg_distances = tf.multiply(distance, neg_one)
    
    # get the indices
    vals, indx = tf.nn.top_k(neg_distances, k=3)
    return {'vals':vals,'indx':indx}

In [35]:
def get_majorirty_vote(idxlst):
    dic = {}
    dic['real'] = 0
    dic['fake'] = 0
    
    for i in idxlst:
        ss = labels[i]  
        dic[ss] += 1
    
    if dic['real'] >= dic['fake']:
        return 'real'
    else:
        return 'fake'

In [36]:
#重置图（这个是为了用于多次运行）
tf.reset_default_graph()

xtr = tf.placeholder('float', [None, 4096])
xte = tf.placeholder('float', [4096])

out = get_variables(xtr, xte)

accuracy = 0.

#初始化
init = tf.global_variables_initializer()


with tf.Session() as sess:
    sess.run(init)

    for i in range(len(codes2)):
        #计算最相近的所在行位置
        idx = sess.run(out['indx'], feed_dict={xtr:codes, xte: codes2[i, :]})
        
        #print(idx)
        #print('idx ?????-> ', idx[0])
        
        # 取 training set 中最近的 k 个，然后返回它们的 majority vote
        majority_vote_label = get_majorirty_vote(idx)
        print('Test', i, 'Prediction: ', majority_vote_label, \
             'True Class: ', labels2[i])
        
        if majority_vote_label == labels2[i]:
            accuracy += 1./len(codes2)
    
    print('Done!')
    print('Accuracy: ', accuracy)

Test 0 Prediction:  real True Class:  real
Test 1 Prediction:  real True Class:  real
Test 2 Prediction:  real True Class:  real
Test 3 Prediction:  real True Class:  real
Test 4 Prediction:  fake True Class:  real
Test 5 Prediction:  real True Class:  real
Test 6 Prediction:  real True Class:  real
Test 7 Prediction:  real True Class:  real
Test 8 Prediction:  real True Class:  real
Test 9 Prediction:  real True Class:  real
Test 10 Prediction:  real True Class:  real
Test 11 Prediction:  real True Class:  real
Test 12 Prediction:  real True Class:  real
Test 13 Prediction:  real True Class:  real
Test 14 Prediction:  real True Class:  real
Test 15 Prediction:  real True Class:  real
Test 16 Prediction:  real True Class:  real
Test 17 Prediction:  real True Class:  real
Test 18 Prediction:  real True Class:  real
Test 19 Prediction:  real True Class:  real
Test 20 Prediction:  real True Class:  real
Test 21 Prediction:  real True Class:  real
Test 22 Prediction:  real True Class:  rea

Test 185 Prediction:  real True Class:  real
Test 186 Prediction:  real True Class:  real
Test 187 Prediction:  real True Class:  real
Test 188 Prediction:  real True Class:  real
Test 189 Prediction:  fake True Class:  real
Test 190 Prediction:  real True Class:  real
Test 191 Prediction:  real True Class:  real
Test 192 Prediction:  real True Class:  real
Test 193 Prediction:  real True Class:  real
Test 194 Prediction:  real True Class:  real
Test 195 Prediction:  real True Class:  real
Test 196 Prediction:  real True Class:  real
Test 197 Prediction:  real True Class:  real
Test 198 Prediction:  real True Class:  real
Test 199 Prediction:  real True Class:  real
Test 200 Prediction:  real True Class:  real
Test 201 Prediction:  real True Class:  real
Test 202 Prediction:  real True Class:  real
Test 203 Prediction:  real True Class:  real
Test 204 Prediction:  real True Class:  real
Test 205 Prediction:  real True Class:  real
Test 206 Prediction:  real True Class:  real
Test 207 P

Test 372 Prediction:  real True Class:  fake
Test 373 Prediction:  fake True Class:  fake
Test 374 Prediction:  fake True Class:  fake
Test 375 Prediction:  fake True Class:  fake
Test 376 Prediction:  real True Class:  fake
Test 377 Prediction:  fake True Class:  fake
Test 378 Prediction:  fake True Class:  fake
Test 379 Prediction:  fake True Class:  fake
Test 380 Prediction:  real True Class:  fake
Test 381 Prediction:  fake True Class:  fake
Test 382 Prediction:  fake True Class:  fake
Test 383 Prediction:  fake True Class:  fake
Test 384 Prediction:  fake True Class:  fake
Test 385 Prediction:  fake True Class:  fake
Test 386 Prediction:  fake True Class:  fake
Test 387 Prediction:  fake True Class:  fake
Test 388 Prediction:  real True Class:  fake
Test 389 Prediction:  fake True Class:  fake
Test 390 Prediction:  fake True Class:  fake
Test 391 Prediction:  real True Class:  fake
Test 392 Prediction:  real True Class:  fake
Test 393 Prediction:  fake True Class:  fake
Test 394 P