## TO DO

- Resize all images to standard size
- Add code to convert labels to proper format
- Figure out how to do proper testing (code to pair up images randomly, predict their score using neural network, and then compare whether the ordinal match was right using our score)
- Figure out how to incoroporate regression output
- Rewrite of code

## Create Neural Network

In [23]:
from keras import applications
from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.utils import np_utils

# If you want to specify input tensor
input_tensor = Input(shape=(160, 160, 3))
vgg_model = applications.VGG16(weights='imagenet',
                               include_top=False,
                               input_tensor=input_tensor)

# To see the models' architecture and layer names, run the following
vgg_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 160, 160, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 160, 160, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 160, 160, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 80, 80, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 80, 80, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 80, 80, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 40, 40, 128)       0         
__________

In [145]:
# Creating dictionary that maps layer names to the layers
layer_dict = dict([(layer.name, layer) for layer in vgg_model.layers])

# Getting output tensor of the last VGG layer that we want to include
x = layer_dict['block4_pool'].output

# Stacking a new simple convolutional network on top of it    
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(10, activation='softmax')(x)

# Creating new model. Please note that this is NOT a Sequential() model.
from keras.models import Model
custom_model = Model(inputs=vgg_model.input, outputs=x)

# Make sure that the pre-trained bottom layers are not trainable
for layer in custom_model.layers[:15]:
    layer.trainable = False

# Do not forget to compile it
custom_model.compile(loss='categorical_crossentropy',
                     optimizer='rmsprop',
                     metrics=['accuracy'])

## Train Neural Network

In [63]:
import cv2
import numpy as np
import csv, sqlite3
import math
import os
import random

In [207]:
"""
Create dictionary with:
Key = filename root
Value = score
"""

con = sqlite3.connect("imagion.db")
cur = con.cursor()

table_cols = [i[0] for i in cur.execute("SELECT * FROM imagion").description]

def get_filenames(): 
    files_dict = {}
    
    cur.execute("SELECT filename, scale_qsc FROM imagion")
    
    count = 0
    
    for file_, int_score in cur.fetchall():
        if count == 0:
            count += 1
            continue
            
        count += 1 
        
        int_score = int_score - 1
        
        files_dict[file_] = int_score
        
    return files_dict

files_dict = get_filenames()

In [208]:
"""Split training and test images"""

random.seed(10)

keys = files_dict.keys()
split = int(len(files_dict.keys()) * 0.75)

random.shuffle(keys)

train_keys = keys[:split]
test_keys = keys[split:]

In [213]:
def chunks(l, n):
    """Yield successive n-sized chunks from l"""
    for i in xrange(0, len(l), n):
        yield l[i:i+n]

# def get_train_data(chunk, img_row, img_col):
#     X_train = []
#     Y_train = []
    
#     try:
#         for imgname in chunk:
#             Y_train.append(files_dict[imgname])
#             filename = 'data_images'+'/'+imgname+'.png'
#             img = cv2.imread(filename)
#             img = cv2.resize(img,(img_row,img_col))
#             X_train.append(img)

#         X_train = np.asarray(X_train)
#         Y_train = np.asarray(Y_train)
        
#         return X_train,Y_train

#     except:
#         X_train=None
#         Y_train=None
#         return X_train,Y_train

def get_train_data(chunk, img_row, img_col):
    X_train = []
    Y_train = []
    
    for imgname in chunk:
        try:
            filename = 'data_images'+'/'+imgname+'.png'
            img = cv2.imread(filename)
            img = cv2.resize(img,(img_row,img_col))
            X_train.append(img)
            Y_train.append(files_dict[imgname])
        except: 
            continue
    X_train = np.asarray(X_train)
    Y_train = np.asarray(Y_train)

    return X_train,Y_train
    
    
    
    
def get_test_data(chunk, img_row, img_col):
    X_test = []
    Y_test = []
    
    for imgname in chunk:
        try:
            filename = './data_images'+'/'+imgname+'.png'
            img = cv2.imread(filename)
            img = cv2.resize(img,(img_row,img_col))
            X_test.append(img)
            Y_test.append(files_dict[imgname])
        except:
            continue
    X_test = np.asarray(X_test)
    Y_test = np.asarray(Y_test)

    return X_test,Y_test

def getTrainData(chunk,nb_classes,img_rows,img_cols):
    X_train,Y_train = get_train_data(chunk,img_rows,img_cols)
    if (X_train!=None and Y_train!=None):
        X_train/=255
    Y_train=np_utils.to_categorical(Y_train, num_classes = 10)
    return (X_train,Y_train)

def getTestData(chunk,nb_classes,img_rows,img_cols):
    X_test,Y_test = get_test_data(chunk,img_rows,img_cols)
    if (X_test!=None and Y_test!=None):
        X_test/=255
    Y_test=np_utils.to_categorical(Y_test, num_classes = 10)
    return (X_test,Y_test)

def test(model, nb_epoch, spatial_test_data, nb_classes, img_rows, img_cols):
    X_test,Y_test = getTestData(test_keys,nb_classes,img_rows,img_cols)
    return (X_test, Y_test)


In [None]:
chunk_size = 10
nb_epoch = 50
batch_size = 2
nb_classes = 10
chunk_size = 32
img_rows = 160
img_cols = 160


for e in range(nb_epoch):
    print('-'*40)
    print('Epoch', e)
    print('-'*40)
    print("Training...")
    instance_count=0


    for chunk in chunks(train_keys, chunk_size):
        X_chunk,Y_chunk=getTrainData(chunk,nb_classes,img_rows,img_cols)

        if (X_chunk!=None and Y_chunk!=None):
            #for X_batch, Y_batch in datagen.flow(X_chunk, Y_chunk, batch_size=chunk_size):
            loss = custom_model.fit(X_chunk, Y_chunk, verbose=1, batch_size=batch_size, epochs=1)
            instance_count+=chunk_size
            print instance_count
            if instance_count%100==0:
                custom_model.save_weights('basic_model.h5',overwrite=True)
            
                    
            

----------------------------------------
('Epoch', 0)
----------------------------------------
Training...




Epoch 1/1
32
Epoch 1/1
64
Epoch 1/1
96
Epoch 1/1
128
Epoch 1/1
160
Epoch 1/1
192
Epoch 1/1
224
Epoch 1/1
256
Epoch 1/1
288
Epoch 1/1
320
Epoch 1/1
352
Epoch 1/1
384
Epoch 1/1
416
Epoch 1/1
448
Epoch 1/1
480
Epoch 1/1
512
Epoch 1/1
544
Epoch 1/1
576
Epoch 1/1
608
Epoch 1/1
640
Epoch 1/1
672
Epoch 1/1
704
Epoch 1/1
736
Epoch 1/1
768
Epoch 1/1
800
Epoch 1/1
832
Epoch 1/1
864
Epoch 1/1
896
Epoch 1/1
928
Epoch 1/1
960
Epoch 1/1
992
Epoch 1/1
1024
Epoch 1/1
1056
Epoch 1/1
1088
Epoch 1/1
1120
Epoch 1/1
1152
Epoch 1/1
1184
Epoch 1/1
1216
Epoch 1/1

In [None]:
""" SUDO CODE FOR CUSTOM ACCURACY FUNCTION"""

def test_accuracy():
    count = 0
    accuracy = 0
    
    # select two images randomly
    
    # predict score for each image
    
    # compare whichever predicted score is higher
    
    # compare whichever "score" is higher
    
    # check if comparisons match 
    
        # if comparisons match, then accuracy += 1
        # if not, then nothing
        
    # count += 1
    
    # divide accuracy / count to get final accuracy percentage
    