# Training model

Note: I have tried several models, starting with handmade 8-layer NN, ending with Resnet18, used by my colleagues, however, all of them failed to perform for different reasons (died VMs, some overtraining, other unknown problems). I do admit using different sources in the web as well as a help of my colleague Alexandr Marinsky and other people, thanks to them.

Finally, I ended with this IncenptionV3-based model with some changes as this model showed not very high but stable results. Originally, this model comes from standard Keras models and was trained on well-known Imagenet datadase. 
The algorithm is rather simple: traing the "Inception part" of NN on our dataset, when save the results as Numpy-arrays, train with this Numpy-arrays the upper layer and than complile and tune model with blicked inner layers.

In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD

from keras import backend as K
K.set_image_dim_ordering('th')

import numpy as np
import pandas as pd
import h5py

Using TensorFlow backend.


In [3]:
inc_model=InceptionV3(include_top=False, 
                      weights='imagenet', 
                      input_shape=(3, 150, 150))

In [126]:
import zipfile

local_zip = '//jet/prs/workspace/Dataset_iphone_new.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/home/talyanskaya_marina/Dataset_iphone_new')
zip_ref.close()

In [4]:
bottleneck_datagen = ImageDataGenerator(rescale=1./255)
    
train_generator = bottleneck_datagen.flow_from_directory('/home/talyanskaya_marina/Dataset_iphone_new/Dataset_iphone_new/train',
                                        target_size=(150, 150),
                                        batch_size=32,
                                        class_mode=None,
                                        shuffle=False)

validation_generator = bottleneck_datagen.flow_from_directory('/home/talyanskaya_marina/Dataset_iphone_new/Dataset_iphone_new/validation',
                                                               target_size=(150, 150),
                                                               batch_size=32,
                                                               class_mode=None,
                                                               shuffle=False)

Found 24000 images belonging to 2 classes.
Found 24000 images belonging to 2 classes.


In [5]:
import os
import shutil

In [6]:
try:
    os.mkdir('/home/talyanskaya_marina/bottleneck_features')
except OSError:
    shutil.rmtree('/home/talyanskaya_marina/bottleneck_features')
    os.mkdir('/home/talyanskaya_marina/bottleneck_features')

bottleneck_features_train = inc_model.predict_generator(train_generator, steps = 24000/32)
np.save(open('/home/talyanskaya_marina/bottleneck_features/bn_features_train.npy', 'wb+'), bottleneck_features_train)
bottleneck_features_validation = inc_model.predict_generator(validation_generator, steps = 24000/32)
np.save(open('/home/talyanskaya_marina/bottleneck_features/bn_features_validation.npy', 'wb+'), bottleneck_features_validation)

In [7]:
train_data = np.load(open('/home/talyanskaya_marina/bottleneck_features/bn_features_train.npy', 'rb'))
train_labels = np.array([0] * 12000 + [1] * 12000) 

validation_data = np.load(open('/home/talyanskaya_marina/bottleneck_features/bn_features_validation.npy', 'rb'))
validation_labels = np.array([0] * 12000 + [1] * 12000)

In [8]:
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten

In [9]:
fc_model = Sequential()
fc_model.add(Flatten(input_shape=train_data.shape[1:]))
fc_model.add(Dense(64, activation='relu', name='dense_one'))
fc_model.add(Dropout(0.5, name='dropout_one'))
fc_model.add(Dense(64, activation='relu', name='dense_two'))
fc_model.add(Dropout(0.5, name='dropout_two'))
fc_model.add(Dense(1, activation='sigmoid', name='output'))

fc_model.compile(optimizer='rmsprop', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

In [10]:
try:
    os.mkdir('/home/talyanskaya_marina/bottleneck_features_and_weights/')
except OSError:
    shutil.rmtree('/home/talyanskaya_marina/bottleneck_features_and_weights/')
    os.mkdir('/home/talyanskaya_marina/bottleneck_features_and_weights/')

In [11]:
fc_model.fit(train_data, train_labels,
            nb_epoch=50, batch_size=32,
            validation_data=(validation_data, validation_labels))

  This is separate from the ipykernel package so we can avoid doing imports until


Train on 24000 samples, validate on 24000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f4de0cb9e10>

In [12]:
fc_model.save_weights('/home/talyanskaya_marina/bottleneck_features_and_weights/fc_inception_iphone_250.hdf5') # сохраняем веса

In [13]:
fc_model.evaluate(validation_data, validation_labels)



[0.71865298407276468, 0.75579166666666664]

In [59]:
weights_filename='/home/talyanskaya_marina/bottleneck_features_and_weights/fc_inception_iphone_250.hdf5'

x = Flatten()(inc_model.output)
x = Dense(64, activation='relu', name='dense_one')(x)
x = Dropout(0.5, name='dropout_one')(x)
x = Dense(64, activation='relu', name='dense_two')(x)
x = Dropout(0.5, name='dropout_two')(x)
top_model=Dense(1, activation='sigmoid', name='output')(x)
finalModel = Model(input=inc_model.input, output=top_model)

  if __name__ == '__main__':


In [60]:
weights_filename='/home/talyanskaya_marina/bottleneck_features_and_weights/fc_inception_iphone_250.hdf5'
finalModel.load_weights(weights_filename, by_name=True)

In [61]:
for layer in inc_model.layers[:205]:
    layer.trainable = False

In [62]:
import tensorflow as tf
from sklearn.metrics import average_precision_score

def AP(y_true, y_pred):
    return tf.py_func(average_precision_score, (y_true, y_pred), tf.double)

In [63]:
finalModel.compile(loss='binary_crossentropy',
              optimizer=SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy', AP])

In [64]:
try:
    os.mkdir('/home/talyanskaya_marina/new_model_weights/')
except OSError:
    shutil.rmtree('/home/talyanskaya_marina/new_model_weights/')
    os.mkdir('/home/talyanskaya_marina/new_model_weights/')

In [65]:
filepath="/home/talyanskaya_marina/new_model_weights/weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [66]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range = 90,
        width_shift_range = 0.25,
        height_shift_range = 0.25,
        brightness_range = [0.3, 1.5],    
        shear_range = 0.4,
        zoom_range = 0.2,
        vertical_flip = True,
        horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
        '/home/talyanskaya_marina/Dataset_iphone_new/Dataset_iphone_new/train',#'/home/talyanskaya_marina/final_dataset/train',
        target_size=(150, 150),
        batch_size=32,
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        '/home/talyanskaya_marina/Dataset_iphone_new/Dataset_iphone_new/validation',#'/home/talyanskaya_marina/final_dataset/validation/',
        target_size=(150, 150),
        batch_size=32,
        class_mode='binary')


pred_generator=test_datagen.flow_from_directory('/home/talyanskaya_marina/Dataset_iphone_new/Dataset_iphone_new/validation',#'/home/talyanskaya_marina/final_dataset/validation/',
                                                     target_size=(150,150),
                                                     batch_size=100,
                                                     class_mode='binary')

Found 24000 images belonging to 2 classes.
Found 24000 images belonging to 2 classes.
Found 24000 images belonging to 2 classes.


In [67]:
finalModel.fit_generator(
        train_generator,
        samples_per_epoch=24000,
        nb_epoch=100,
        validation_data=validation_generator,
        nb_val_samples=24000,
    callbacks=callbacks_list)

try:
    os.mkdir('/home/talyanskaya_marina/lastModel')
except OSError:
    shutil.rmtree('/home/talyanskaya_marina/lastModel')
    os.mkdir('/home/talyanskaya_marina/lastModel')
finalModel.save('/home/talyanskaya_marina/lastModel/model.hdf5')

  import sys
  import sys


Epoch 1/100

Epoch 00001: val_acc improved from -inf to 0.77942, saving model to /home/talyanskaya_marina/new_model_weights/weights-improvement-01-0.78.hdf5
Epoch 2/100

Epoch 00002: val_acc did not improve from 0.77942
Epoch 3/100

Epoch 00003: val_acc improved from 0.77942 to 0.78104, saving model to /home/talyanskaya_marina/new_model_weights/weights-improvement-03-0.78.hdf5
Epoch 4/100

Epoch 00004: val_acc did not improve from 0.78104
Epoch 5/100

Epoch 00005: val_acc did not improve from 0.78104
Epoch 6/100

Epoch 00006: val_acc improved from 0.78104 to 0.78204, saving model to /home/talyanskaya_marina/new_model_weights/weights-improvement-06-0.78.hdf5
Epoch 7/100

Epoch 00007: val_acc did not improve from 0.78204
Epoch 8/100

Epoch 00008: val_acc improved from 0.78204 to 0.78896, saving model to /home/talyanskaya_marina/new_model_weights/weights-improvement-08-0.79.hdf5
Epoch 9/100

Epoch 00009: val_acc did not improve from 0.78896
Epoch 10/100

Epoch 00010: val_acc did not impro


Epoch 00070: val_acc did not improve from 0.81125
Epoch 71/100

Epoch 00071: val_acc did not improve from 0.81125
Epoch 72/100

Epoch 00072: val_acc did not improve from 0.81125
Epoch 73/100

Epoch 00073: val_acc did not improve from 0.81125
Epoch 74/100

Epoch 00074: val_acc did not improve from 0.81125
Epoch 75/100

Epoch 00075: val_acc did not improve from 0.81125
Epoch 76/100

Epoch 00076: val_acc did not improve from 0.81125
Epoch 77/100

Epoch 00077: val_acc did not improve from 0.81125
Epoch 78/100

Epoch 00078: val_acc improved from 0.81125 to 0.81275, saving model to /home/talyanskaya_marina/new_model_weights/weights-improvement-78-0.81.hdf5
Epoch 79/100

Epoch 00079: val_acc improved from 0.81275 to 0.81363, saving model to /home/talyanskaya_marina/new_model_weights/weights-improvement-79-0.81.hdf5
Epoch 80/100

Epoch 00080: val_acc did not improve from 0.81363
Epoch 81/100

Epoch 00081: val_acc did not improve from 0.81363
Epoch 82/100

Epoch 00082: val_acc did not improve 

# Input - output funcion

In [57]:
import argparse
import keras
import os
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
    
localPath = os.getenv("PWD")
#os.chdir('/home/talyanskaya_marina/finalFolder')
#localPath = os.getcwd()

def makeYourPredictions(inPath, modelPath, outPath):

    model = keras.models.load_model(modelPath)
    
    finalTest_datagen = ImageDataGenerator(rescale=1./255)

    finalTest_generator = finalTest_datagen.flow_from_directory(inPath,
                                            target_size=(150, 150),
                                            batch_size=32,
                                            class_mode='binary',
                                            shuffle=False)
    
    inFiles = finalTest_generator.filenames;
    
    finalPredictions = model.predict_generator(finalTest_generator, steps=len(inFiles)/32).flatten();
      
    out_df = pd.DataFrame()
    out_df['image_name'] = inFiles
    out_df['iphone_probability'] = finalPredictions
    out_df.to_csv(outPath + 'yourPredictions.csv', index=False)

if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Iphone detector')
    parser.add_argument('-i', type=str, default=localPath + '/in_folder')#)default='/home/talyanskaya_marina/data main/data/final test/test')
    parser.add_argument('-m', type=str, default=localPath + '/model.hdf5') #default='/home/talyanskaya_marina/savedModel-82/model-82.hdf5')
    parser.add_argument('-o', type=str, default=localPath + '/')#default='/home/talyanskaya_marina/YourPredictionsOutput')
    parser.add_argument('-f', type=str, default='nthg')
    args = parser.parse_args()

    makeYourPredictions(args.i, args.m, args.o)
    
    
    #If we want to write down results in .txt file:
    #try:
    #    os.mkdir(outPath)
    #except OSError:
    #    shutil.rmtree(outPath)
    #    os.mkdir(outPath)
    #finalPath = outPath + '/yourPredictions.txt';

    #f = open(finalPath,'w+');
    
    #for i in range(finalPredictions.shape[0]):
    #    f.write('Probability of ' + inFiles[i] + ' img is iphone is ' + str(round(finalPredictions[i], 2)) + '\n');
        
    #close(f)
        

Found 750 images belonging to 1 classes.


In [58]:
df = pd.read_csv('/home/talyanskaya_marina/finalFolder/yourPredictions.csv')
df['decision']=round(df['iphone_probability'])
sum(df['decision'])

145.0