## TO DO

- Delete empty images and add them to clean_dataset folder [DONE]
- Figure out how to shuffle by user, rather than by photo, make comparisons within users [DONE]
- Figure out how to do proper testing (code to pair up images randomly, predict their score using neural network, and then compare whether the ordinal match was right using our score) [DONE]

- Change training code to pull only images from new training set

### TO DO LATER
- Figure out how to incoroporate regression output
- Rewrite of code

## Create Neural Network

In [70]:
from keras import applications
from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.utils import np_utils

# If you want to specify input tensor
input_tensor = Input(shape=(160, 160, 3))
vgg_model = applications.VGG16(weights='imagenet',
                               include_top=False,
                               input_tensor=input_tensor)

# To see the models' architecture and layer names, run the following
vgg_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 160, 160, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 160, 160, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 160, 160, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 80, 80, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 80, 80, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 80, 80, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 40, 40, 128)       0         
__________

In [71]:
# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in vgg_model.layers])

# Getting output tensor of the last VGG layer that we want to include
x = layer_dict['block4_pool'].output

# Stacking a new simple convolutional network on top of it    
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(10, activation='softmax')(x)

# Creating new model. Please note that this is NOT a Sequential() model.
from keras.models import Model
custom_model = Model(inputs=vgg_model.input, outputs=x)

# Make sure that the pre-trained bottom layers are not trainable
for layer in custom_model.layers[:15]:
    layer.trainable = False

# Do not forget to compile it
custom_model.compile(loss='categorical_crossentropy',
                     optimizer='rmsprop',
                     metrics=['accuracy'])

## Train Neural Network

In [72]:
import cv2
import numpy as np
import csv, sqlite3
import math
import os
import random

In [73]:
"""
Create dictionary with:
Key = filename root
Value = score
"""

con = sqlite3.connect("imagion.db")
cur = con.cursor()

table_cols = [i[0] for i in cur.execute("SELECT * FROM imagion").description]

def get_filenames(): 
    files_dict = {}
    
    cur.execute("SELECT filename, scale_qsc FROM imagion")
    
    count = 0
    
    for file_, int_score in cur.fetchall():
        if count == 0:
            count += 1
            continue
            
        count += 1 
        
        int_score = int_score - 1
        
        files_dict[file_] = int_score
        
    return files_dict

files_dict = get_filenames()

In [74]:
"""
Create dictionary with:
Key = alias
Value = list of filenames for user
"""

DATASET_DIR = 'data_images'

def create_user_dict(dataset_dir):
    user_dict = {}
    
    for filename in os.listdir(dataset_dir):
        filename = filename.rsplit('.', 1)[0]
        alias = filename.rsplit('_', 1)[0]
        
        if alias not in user_dict:
            user_dict[alias] = [filename]
        else:
            user_dict[alias].append(filename)
            
    return user_dict

user_dict = create_user_dict(DATASET_DIR)        

In [75]:
"""Split training and test images"""

PERCENT_TRAINING = 0.75 

random.seed(10)
keys = user_dict.keys()
split = int(len(user_dict.keys()) * PERCENT_TRAINING)

random.shuffle(keys) # revisit this shuffle function

train_keys = keys[:split]
test_keys = keys[split:]

print len(train_keys)
print train_keys[1]

671
oliviapierson


In [76]:
##IMPORTANT
train_imgs = []

for i in range(len(train_keys)):
    for j in range(len(user_dict[train_keys[i]])):
    #print [user_dict[key]]
        train_imgs.append(user_dict[train_keys[i]][j])

print len(train_imgs)
print train_imgs[1:30]

#print test_imgs[1]
#print len(train_imgs)
#print len(test_imgs

10630
['youngmeerim_1', 'youngmeerim_15', 'youngmeerim_3', 'youngmeerim_9', 'youngmeerim_8', 'youngmeerim_5', 'youngmeerim_16', 'youngmeerim_6', 'youngmeerim_0', 'youngmeerim_4', 'youngmeerim_11', 'youngmeerim_14', 'youngmeerim_7', 'youngmeerim_13', 'youngmeerim_12', 'youngmeerim_2', 'oliviapierson_6', 'oliviapierson_9', 'oliviapierson_12', 'oliviapierson_15', 'oliviapierson_13', 'oliviapierson_10', 'oliviapierson_16', 'oliviapierson_11', 'oliviapierson_14', 'oliviapierson_4', 'oliviapierson_0', 'oliviapierson_2', 'oliviapierson_8']


In [77]:
def chunks(l, n):
    """Yield successive n-sized chunks from l"""
    for i in xrange(0, len(l), n):
        yield l[i:i+n]

def get_train_data(chunk, img_row, img_col):
    X_train = []
    Y_train = []
    
    for imgname in chunk:
        try:
            filename = 'data_images'+'/'+imgname+'.png'
            img = cv2.imread(filename)
            print filename
        #if (img != None):
            img = cv2.resize(img,(img_row,img_col))
            X_train.append(img)
            Y_train.append(files_dict[imgname])
            print Y_train
        except: 
            continue
    X_train = np.asarray(X_train)
    Y_train = np.asarray(Y_train)

    return X_train,Y_train
    
    
    
    
def get_test_data(chunk, img_row, img_col):
    X_test = []
    Y_test = []
    
    for imgname in chunk:
        try:
            filename = './data_images'+'/'+imgname+'.png'
            img = cv2.imread(filename)
            img = cv2.resize(img,(img_row,img_col))
            X_test.append(img)
            Y_test.append(files_dict[imgname])
        except:
            continue
    X_test = np.asarray(X_test)
    Y_test = np.asarray(Y_test)

    return X_test,Y_test

def getTrainData(chunk,nb_classes,img_rows,img_cols):
    X_train,Y_train = get_train_data(chunk,img_rows,img_cols)
    #if (X_train!=None and Y_train!=None):
    X_train/=255
    Y_train=np_utils.to_categorical(Y_train, num_classes = 10)
    return (X_train,Y_train)

def getTestData(chunk,nb_classes,img_rows,img_cols):
    X_test,Y_test = get_test_data(chunk,img_rows,img_cols)
    if (X_test!=None and Y_test!=None):
        X_test/=255
    Y_test=np_utils.to_categorical(Y_test, num_classes = 10)
    return (X_test,Y_test)

def test(model, nb_epoch, spatial_test_data, nb_classes, img_rows, img_cols):
    X_test,Y_test = getTestData(test_keys,nb_classes,img_rows,img_cols)
    return (X_test, Y_test)


In [78]:
## chunk_size = 10
num_epochs = 1
nb_epoch = 50
batch_size = 2
nb_classes = 10
chunk_size = 32
img_rows = 160
img_cols = 160

In [82]:
for e in range(nb_epoch):
    print('-'*40)
    print('Epoch', e)
    print('-'*40)
    print("Training...")
    instance_count=0


    for chunk in chunks(train_imgs, chunk_size):
        X_chunk,Y_chunk = getTrainData(chunk,nb_classes,img_rows,img_cols)
        print X_chunk
        print Y_chunk
        #if (X_chunk!=None and Y_chunk!=None):
            #print "NO"
            #for X_batch, Y_batch in datagen.flow(X_chunk, Y_chunk, batch_size=chunk_size):
        loss = custom_model.fit(X_chunk, Y_chunk, verbose=1, batch_size=batch_size, epochs=num_epochs)
        instance_count+=chunk_size
        print instance_count
        if instance_count%100==0:
            custom_model.save_weights('basic_model.h5',overwrite=True)
            
                    
            

----------------------------------------
('Epoch', 0)
----------------------------------------
Training...
data_images/youngmeerim_10.png
[6]
data_images/youngmeerim_1.png
[6, 0]
data_images/youngmeerim_15.png
[6, 0, 2]
data_images/youngmeerim_3.png
[6, 0, 2, 8]
data_images/youngmeerim_9.png
[6, 0, 2, 8, 5]
data_images/youngmeerim_8.png
[6, 0, 2, 8, 5, 5]
data_images/youngmeerim_5.png
[6, 0, 2, 8, 5, 5, 4]
data_images/youngmeerim_16.png
[6, 0, 2, 8, 5, 5, 4, 3]
data_images/youngmeerim_6.png
[6, 0, 2, 8, 5, 5, 4, 3, 7]
data_images/youngmeerim_0.png
[6, 0, 2, 8, 5, 5, 4, 3, 7, 0]
data_images/youngmeerim_4.png
[6, 0, 2, 8, 5, 5, 4, 3, 7, 0, 9]
data_images/youngmeerim_11.png
[6, 0, 2, 8, 5, 5, 4, 3, 7, 0, 9, 1]
data_images/youngmeerim_14.png
[6, 0, 2, 8, 5, 5, 4, 3, 7, 0, 9, 1, 6]
data_images/youngmeerim_7.png
[6, 0, 2, 8, 5, 5, 4, 3, 7, 0, 9, 1, 6, 9]
data_images/youngmeerim_13.png
[6, 0, 2, 8, 5, 5, 4, 3, 7, 0, 9, 1, 6, 9, 8]
data_images/youngmeerim_12.png
[6, 0, 2, 8, 5, 5, 4, 3, 7, 0, 

KeyboardInterrupt: 

## Load Weights

In [80]:
weights_path = 'basic_model.h5'

if weights_path:
    custom_model.load_weights(weights_path)

In [81]:
""" SUDO CODE FOR CUSTOM ACCURACY FUNCTION"""

DATASET_DIR = 'data_images'

def test_accuracy():
    count = 0
    accuracy = 0
    
    for key in test_keys:
        user_imgs = user_dict[key]
        #print user_imgs
        
        if len(user_imgs) > 1:
           # get two elements from shuffled dictionary
            for j in range(len(user_imgs)/2):
                element1 = user_imgs.pop(0)
                element2 = user_imgs.pop(0)
                print element1
                print element2


                # read images
                img1 = cv2.imread(os.path.join(DATASET_DIR, element1+'.png'))
                #print img1
                img2 = cv2.imread(os.path.join(DATASET_DIR, element2+'.png'))

                #resize images
                img1 = cv2.resize(img1,(img_rows,img_cols))
                img2 = cv2.resize(img2,(img_rows,img_cols))

                # expand dimension
                img1 = np.expand_dims(img1, axis=0)
                img2 = np.expand_dims(img2, axis=0)

                # predict score for each image
                predict1 = np.argmax(custom_model.predict(img1))
                predict2 = np.argmax(custom_model.predict(img2))
                print predict1
                print predict2

                # compare whichever predicted score is higher

                # comparison dict
                if predict1 > predict2:
                    max_predict = 'a'
                elif predict1 < predict2:
                    max_predict = 'b'
                else:
                    max_predict = 'equal'

                # compare whichever "score" is higher
                if files_dict[element1] > files_dict[element2]:
                    max_actual = 'a'
                elif files_dict[element1] < files_dict[element2]:
                    max_actual = 'b'
                else:
                    max_actual = 'equal'

                # check if comparisons match 
                if max_predict == max_actual:
                    accuracy +=1

                count += 1

                print max_predict
                print max_actual
                print max_predict == max_actual
                print '\n'

        else:
            continue
    
    accuracy_per = accuracy / (count*1.0)
    
    print count, "comparisons made"
    print "Accuracy score is:", accuracy_per
    
    return

test_accuracy()

vacationwolf_0
vacationwolf_12
9
9
equal
a
False


vacationwolf_13
vacationwolf_4
9
9
equal
b
False


vacationwolf_5
vacationwolf_15
9
9
equal
a
False


vacationwolf_6
vacationwolf_2
8
9
b
b
True


vacationwolf_7
vacationwolf_8
9
9
equal
a
False


vacationwolf_9
vacationwolf_16
9
9
equal
b
False


vacationwolf_1
vacationwolf_3
9
9
equal
b
False


vacationwolf_14
vacationwolf_11
9
9
equal
b
False


cedarwright_16
cedarwright_12
9
9
equal
equal
True


cedarwright_14
cedarwright_15
0
9
b
a
False


cedarwright_3
cedarwright_13
9
0
a
a
True


cedarwright_10
cedarwright_1
0
9
b
equal
False


cedarwright_9
cedarwright_7
9
9
equal
b
False


cedarwright_8
cedarwright_11
0
9
b
a
False


cedarwright_5
cedarwright_6
9
9
equal
a
False


cedarwright_4
cedarwright_2
9
9
equal
a
False


travisburkephotography_9
travisburkephotography_2
9
9
equal
a
False


travisburkephotography_6
travisburkephotography_3
9
9
equal
b
False


travisburkephotography_15
travisburkephotography_8
9
9
equal
b
False


travisb