In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras.backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.optimizers import Adam
from keras.models import load_model
import importlib as imp
import gc
from datetime import datetime

# custom module for capstone 2
import cap2tools as c2t
imp.reload(c2t)

Using TensorFlow backend.


<module 'cap2tools' from 'C:\\Users\\Nils\\Documents\\GitHub\\Springboard-Capstone-2-local-yelp\\cap2tools.py'>

In [2]:
# configure GPU memory usage by tensorflow
config = K.tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.90
K.tensorflow_backend.set_session(K.tf.Session(config=config))

In [3]:
# define paths to image directories
photos_path = 'H:/springboard/other_data/yelp/Photos/final_photos/'
train_path = photos_path + 'train'
valid_path = photos_path + 'val'

# create data generators
train_batches, valid_batches = c2t.build_datagens(train_path, valid_path, augment=True)

Found 230992 images belonging to 5 classes.
Found 20000 images belonging to 5 classes.


In [4]:
# set balanced class weights
images_df = pd.read_csv('photo_labels_all.csv')
train_df = images_df[images_df.set == 'train']
counts = train_df.label.value_counts()
weights = counts.food/counts
weights_df = pd.DataFrame(counts)
weights_df.columns = ['count']
weights_df['weight'] = weights
weights_df['i'] = [1, 2, 4, 0, 3]
weights_df.set_index('i', inplace=True)
weights_dict = weights_df.weight.to_dict()

In [5]:
# test model without class weight balancing
n_epochs = 5

# build model
model = c2t.build_VGG16(widths=(1000, 1250), 
                        new_weights=False, 
                        trainable=True, 
                        dropout1=0, 
                        dropout2=0.2)

model.compile(optimizer=Adam(lr=0.0001, decay=0.1), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# define callbacks
filepath = 'models/full_set_model.h5'
saver = ModelCheckpoint(filepath,
                        monitor='val_loss',
                        verbose=1,
                        save_best_only=True)

stopper = EarlyStopping(monitor='val_loss', 
                        patience=2, 
                        verbose=1)

datetime_now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print('{} - Started training {}'.format(datetime_now, filepath))

# train model without balanced class weights
history = model.fit_generator(train_batches, 
                              validation_data=valid_batches,
                              epochs=n_epochs,
                              verbose=1, 
                              callbacks=[saver, stopper])

# clear memory
K.clear_session()
del model
gc.collect()

2018-09-25 00:58:12 - Started training models/full_set_model.h5
Epoch 1/5

Epoch 00001: val_loss improved from inf to 0.21304, saving model to models/full_set_model.h5
Epoch 2/5

Epoch 00002: val_loss improved from 0.21304 to 0.20411, saving model to models/full_set_model.h5
Epoch 3/5

Epoch 00003: val_loss improved from 0.20411 to 0.19836, saving model to models/full_set_model.h5
Epoch 4/5

Epoch 00004: val_loss improved from 0.19836 to 0.19702, saving model to models/full_set_model.h5
Epoch 5/5

Epoch 00005: val_loss improved from 0.19702 to 0.19311, saving model to models/full_set_model.h5


21582

In [5]:
model_paths = {'model': 'models/full_set_model.h5'}
model_metrics = c2t.eval_models(model_paths, valid_path)
c2t.print_eval(model_metrics['model'])

Building image generator...
Found 20000 images belonging to 5 classes.
Loading models/full_set_model.h5
Evaluating models/full_set_model.h5
Evaluation complete.

accuracy:  93.1%
loss:  0.1931
pcr:  [0.4727 0.9859 0.9024 0.2095 0.8182]
mean pcr:  67.77%
confusion matrix: 


[[355, 168, 199, 0, 29],
 [40, 13055, 110, 0, 37],
 [19, 83, 3847, 0, 314],
 [3, 4, 39, 22, 37],
 [1, 6, 291, 0, 1341]]

In [5]:
# test model with class weight balancing
n_epochs = 5

# build model
model = c2t.build_VGG16(widths=(1000, 1250), 
                        new_weights=False, 
                        trainable=True, 
                        dropout1=0, 
                        dropout2=0.2)

model.compile(optimizer=Adam(lr=0.0001, decay=0.1), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# define callbacks
filepath = 'models/full_set_model_balanced.h5'
saver = ModelCheckpoint(filepath,
                        monitor='val_loss',
                        verbose=1,
                        save_best_only=True)

stopper = EarlyStopping(monitor='val_loss', 
                        patience=2, 
                        verbose=1)

datetime_now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print('{} - Started training {}'.format(datetime_now, filepath))

# train model with balanced class weights
history_2 = model.fit_generator(train_batches, 
                                validation_data=valid_batches,
                                epochs=n_epochs,
                                class_weight=weights_dict,
                                verbose=1, 
                                callbacks=[saver, stopper])

# clear memory
K.clear_session()
del model
gc.collect()

2018-09-26 02:01:51 - Started training models/full_set_model_balanced.h5
Epoch 1/5

Epoch 00001: val_loss improved from inf to 0.41823, saving model to models/full_set_model_balanced.h5
Epoch 2/5

Epoch 00002: val_loss improved from 0.41823 to 0.40988, saving model to models/full_set_model_balanced.h5
Epoch 3/5

Epoch 00003: val_loss improved from 0.40988 to 0.38910, saving model to models/full_set_model_balanced.h5
Epoch 4/5

Epoch 00004: val_loss did not improve from 0.38910
Epoch 5/5

Epoch 00005: val_loss improved from 0.38910 to 0.38255, saving model to models/full_set_model_balanced.h5


21582

In [8]:
model_paths = {'model': 'models/full_set_model_balanced.h5'}
model_metrics = c2t.eval_models(model_paths, valid_path)
c2t.print_eval(model_metrics['model'])

Building image generator...
Found 20000 images belonging to 5 classes.
Loading models/full_set_model_balanced.h5
Evaluating models/full_set_model_balanced.h5
Evaluation complete.

accuracy:  85.98%
loss:  0.3826
pcr:  [0.7909 0.8954 0.7983 0.9619 0.7566]
mean pcr:  84.06%
confusion matrix: 


[[594, 26, 69, 24, 38],
 [1065, 11857, 174, 66, 80],
 [121, 43, 3403, 58, 638],
 [0, 0, 1, 101, 3],
 [10, 3, 354, 32, 1240]]