In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import tensorflow as tf
import keras.backend as K
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from importlib import reload

# custom module for capstone 2
import cap2tools as c2t
reload(c2t)

Using TensorFlow backend.


<module 'cap2tools' from 'C:\\Users\\Nils\\Documents\\GitHub\\Springboard-Capstone-2-local-yelp\\cap2tools.py'>

In [2]:
# configure GPU memory usage by tensorflow
config = K.tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.80
K.tensorflow_backend.set_session(K.tf.Session(config=config))

In [3]:
# define paths to image directories
valid_path = 'downsampled/val'

Found 5480 images belonging to 5 classes.
Found 525 images belonging to 5 classes.


In [4]:
# load image information
data = pd.read_csv('photo_labels_all.csv')
data.head()

Unnamed: 0,photo_id,label,set
0,gVvL0bqKrOuBhuuaXdF47Q,food,test
1,DwBK2MSbxvGizFJ16d_sqQ,food,test
2,y1itUxrDBEEywoB49XEvFw,food,test
3,HZpQjwcMQJOBmYpr8Ds-rA,food,test
4,SLw-nStWXfaYqlSfzIwyIQ,inside,test


In [5]:
# make class imbalance table
table = pd.DataFrame(data.label.value_counts())
table.columns = ['Image Count']
table.index.name = 'Class'
table.to_csv('figures/class_imba_table.csv')
table

Unnamed: 0_level_0,Image Count
Class,Unnamed: 1_level_1
food,184456
inside,61620
outside,23214
drink,10350
menu,1352


In [6]:
# make class weights table
train_df = data[data.set == 'train']
counts = train_df.label.value_counts()
weights = counts.food/counts
weights_df = pd.DataFrame(counts)
weights_df.columns = ['Image Count']
weights_df['Weight'] = round(weights, 2)
weights_df.index.name = 'Class'
weights_df.to_csv('figures/class_weights_table.csv')
weights_df

Unnamed: 0_level_0,Image Count,Weight
Class,Unnamed: 1_level_1,Unnamed: 2_level_1
food,151588,1.0
inside,50684,2.99
outside,19138,7.92
drink,8486,17.86
menu,1096,138.31


In [7]:
# evaluate width conditions
widths = [400, 600, 800, 1000, 1200, 1400, 1600]
replicates = 3

model_paths = dict()

for width in widths:
    condition = 'width_{}'.format(str(width))
    
    # add key-value pairs to model_paths
    for i in range(1, replicates+1):
        key = condition + ' - ' + str(i)
        value = 'models/vgg16_{}_{}.h5'.format(condition, str(i))
        model_paths[key] = value
        
model_metrics = c2t.eval_models(model_paths, valid_path)

Building image generator...
Found 525 images belonging to 5 classes.
Loading models/vgg16_width_400_1.h5
Evaluating models/vgg16_width_400_1.h5
Loading models/vgg16_width_400_2.h5
Evaluating models/vgg16_width_400_2.h5
Loading models/vgg16_width_400_3.h5
Evaluating models/vgg16_width_400_3.h5
Loading models/vgg16_width_600_1.h5
Evaluating models/vgg16_width_600_1.h5
Loading models/vgg16_width_600_2.h5
Evaluating models/vgg16_width_600_2.h5
Loading models/vgg16_width_600_3.h5
Evaluating models/vgg16_width_600_3.h5
Loading models/vgg16_width_800_1.h5
Evaluating models/vgg16_width_800_1.h5
Loading models/vgg16_width_800_2.h5
Evaluating models/vgg16_width_800_2.h5
Loading models/vgg16_width_800_3.h5
Evaluating models/vgg16_width_800_3.h5
Loading models/vgg16_width_1000_1.h5
Evaluating models/vgg16_width_1000_1.h5
Loading models/vgg16_width_1000_2.h5
Evaluating models/vgg16_width_1000_2.h5
Loading models/vgg16_width_1000_3.h5
Evaluating models/vgg16_width_1000_3.h5
Loading models/vgg16_widt

In [11]:
# save width comparison table
c2t.eval_table(model_metrics, index_name='Width').to_csv('figures/width_comparison_table.csv')

In [4]:
# evaluate learning rate conditions
rates = [0.0005, 0.0002, 0.0001, 0.00005, 0.00002]
replicates = 3

model_paths = dict()
dir_path = 'H:/springboard/other_data/yelp/models'

for rate in rates:
    condition = 'learning_rate_{}'.format(str(rate).replace('.', '-'))
    
    # add key-value pairs to model_paths
    for i in range(1, replicates+1):
        key = condition + ' - ' + str(i)
        value = '{}/vgg16_{}_{}.h5'.format(dir_path, condition, str(i))
        model_paths[key] = value

model_metrics = c2t.eval_models(model_paths, valid_path)

Building image generator...
Found 525 images belonging to 5 classes.
Loading H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0005_1.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0005_1.h5
Loading H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0005_2.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0005_2.h5
Loading H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0005_3.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0005_3.h5
Loading H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0002_1.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0002_1.h5
Loading H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0002_2.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0002_2.h5
Loading H:/springboard/other_data/yelp/models/vgg16_learning_rate_0-0002_3.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_lea

In [22]:
# save learn rate comparison table
c2t.eval_table(model_metrics, index_name='Learning Rate').to_csv('figures/lr_comparison_table.csv')

In [5]:
# evaluate dropout1 conditions
d1s = np.array([0, 0.1, 0.2, 0.3, 0.4])
replicates = 3

model_paths = dict()
dir_path = 'H:/springboard/other_data/yelp/models'

for d1 in d1s:
    condition = 'dropout1_{}'.format(str(d1).replace('.', '-'))
    
    # add key-value pairs to model_paths
    for i in range(1, replicates+1):
        key = condition + ' - ' + str(i)
        value = '{}/vgg16_{}_{}.h5'.format(dir_path, condition, str(i))
        model_paths[key] = value
        
model_metrics = c2t.eval_models(model_paths, valid_path)

Building image generator...
Found 525 images belonging to 5 classes.
Loading H:/springboard/other_data/yelp/models/vgg16_dropout1_0-0_1.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout1_0-0_1.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout1_0-0_2.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout1_0-0_2.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout1_0-0_3.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout1_0-0_3.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout1_0-1_1.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout1_0-1_1.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout1_0-1_2.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout1_0-1_2.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout1_0-1_3.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout1_0-1_3.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout1_0-2_1.h5
Eva

In [6]:
# save dropout1 comparison table
c2t.eval_table(model_metrics, index_name='Dropout 1').to_csv('figures/d1_comparison_table.csv')

In [4]:
# evaluate dropout2 conditions
d2s = np.array([0, 0.1, 0.2, 0.3, 0.4])
replicates = 3

model_paths = dict()
dir_path = 'H:/springboard/other_data/yelp/models'

for d2 in d2s:
    condition = 'dropout2_{}'.format(str(d2).replace('.', '-'))
    
    # add key-value pairs to model_paths
    for i in range(1, replicates+1):
        key = condition + ' - ' + str(i)
        value = '{}/vgg16_{}_{}.h5'.format(dir_path, condition, str(i))
        model_paths[key] = value
        
model_metrics = c2t.eval_models(model_paths, valid_path)

Building image generator...
Found 525 images belonging to 5 classes.
Loading H:/springboard/other_data/yelp/models/vgg16_dropout2_0-0_1.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout2_0-0_1.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout2_0-0_2.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout2_0-0_2.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout2_0-0_3.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout2_0-0_3.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout2_0-1_1.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout2_0-1_1.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout2_0-1_2.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout2_0-1_2.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout2_0-1_3.h5
Evaluating H:/springboard/other_data/yelp/models/vgg16_dropout2_0-1_3.h5
Loading H:/springboard/other_data/yelp/models/vgg16_dropout2_0-2_1.h5
Eva

In [5]:
# save dropout2 comparison table
c2t.eval_table(model_metrics, index_name='Dropout 2').to_csv('figures/d2_comparison_table.csv')

In [6]:
# evaluate full image set model on full validation set
photos_path = 'H:/springboard/other_data/yelp/Photos/final_photos/'
valid_path = photos_path + 'val'

model_paths = {'model': 'models/full_set_model.h5'}
model_metrics = c2t.eval_models(model_paths, valid_path)

Building image generator...
Found 20000 images belonging to 5 classes.
Loading models/full_set_model.h5
Evaluating models/full_set_model.h5
Evaluation complete.



In [7]:
c2t.print_eval(model_metrics['model'])

accuracy:  93.1%
loss:  0.1931
pcr:  [0.4727 0.9859 0.9024 0.2095 0.8182]
mean pcr:  67.77%
confusion matrix: 


[[355, 168, 199, 0, 29],
 [40, 13055, 110, 0, 37],
 [19, 83, 3847, 0, 314],
 [3, 4, 39, 22, 37],
 [1, 6, 291, 0, 1341]]