# Organize the Data

In [3]:
import numpy as np

In [4]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = LESSON_HOME_DIR+'/data/'

In [5]:
#Instantiate plotting tool
#In Jupyter notebooks, you will need to run this command before doing any plotting
%matplotlib inline

## Action Plan
1. Create Validation and Sample sets
2. Rearrange image files into their respective directories 
3. Finetune and Train model
4. Generate predictions
5. Validate predictions
6. Submit predictions to Kaggle

## Create validation set and sample

In [6]:
#Create directories
%mkdir valid
%mkdir results

In [7]:
#creating respecting class dirs in valid
%cd valid/
%ls
%mkdir Type_1
%mkdir Type_2
%mkdir Type_3

C:\Users\Flo\Source\Repos\Cervical_Cancer_Comp\valid
 Volume in Laufwerk C: hat keine Bezeichnung.
 Volumeseriennummer: 661E-8808

 Verzeichnis von C:\Users\Flo\Source\Repos\Cervical_Cancer_Comp\valid

17.05.2017  10:56    <DIR>          .
17.05.2017  10:56    <DIR>          ..
               0 Datei(en),              0 Bytes
               2 Verzeichnis(se), 76.071.641.088 Bytes frei


In [8]:
# current dir
%pwd

u'C:\\Users\\Flo\\Source\\Repos\\Cervical_Cancer_Comp\\valid'

In [9]:
%cd $DATA_HOME_DIR/train

[Error 2] Das System kann die angegebene Datei nicht finden: u'C:\\Users\\Flo\\Source\\Repos\\Cervical_Cancer_Comp/data//train'
C:\Users\Flo\Source\Repos\Cervical_Cancer_Comp\valid


In [9]:
from glob import glob

In [10]:
%ls
g = glob('**/*.jpg')
shuf = np.random.permutation(g)
validation_dir = DATA_HOME_DIR+'valid/' 

for i in range(444): os.rename(shuf[i], validation_dir + shuf[i])

[0m[01;34mresults[0m/  [01;34mType_1[0m/  [01;34mType_2[0m/  [01;34mType_3[0m/  [01;34mvalid[0m/


In [16]:
# check valid dir for type 1
%cd $DATA_HOME_DIR
%ls
%cd valid/Type_1
%ls -1 | wc -l

/home/paperspace/NBs/Cervical_Cancer_Comp/data
[0m[01;34mbak[0m/  [01;32mdownload.sh[0m*  [01;34mresults[0m/  [01;34mtest[0m/  [01;31mtest.7z[0m  [01;34mtrain[0m/  [01;31mtrain.7z[0m  [01;34mvalid[0m/
/home/paperspace/NBs/Cervical_Cancer_Comp/data/valid/Type_1
76


In [17]:
# check valid dir for type 2
%cd ..
%cd Type_2
% ls -1 | wc -l

/home/paperspace/NBs/Cervical_Cancer_Comp/data/valid
/home/paperspace/NBs/Cervical_Cancer_Comp/data/valid/Type_2
229


In [18]:
# check valid dir for type 2
%cd ..
%cd Type_3
% ls -1 | wc -l

/home/paperspace/NBs/Cervical_Cancer_Comp/data/valid
/home/paperspace/NBs/Cervical_Cancer_Comp/data/valid/Type_3
139


In [21]:
# Create single 'unknown' class for test set
%cd $DATA_HOME_DIR
%cd test
%mkdir unknown
%mv *.jpg unknown/

/home/paperspace/NBs/Cervical_Cancer_Comp/data
/home/paperspace/NBs/Cervical_Cancer_Comp/data/test


# Creating the Model

In [28]:
# imports
from keras.layers import *
from keras.optimizers import *
from keras.applications import *
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.applications import imagenet_utils
from keras.applications.inception_v3 import preprocess_input
from keras.callbacks import ModelCheckpoint
from keras import backend as k

In [22]:
train_data_dir = DATA_HOME_DIR+'/train'
test_data_dir = DATA_HOME_DIR+'/test'
validation_data_dir = DATA_HOME_DIR+'/valid'


nb_train_samples = 1037
nb_validation_samples = 444

if we are using Inception or Xception, we need to set the inputShape  to 299×299 pixels, followed by updating preprocess  to use a separate pre-processing function that performs a different type of scaling.

In [24]:
# hyper parameters for model
nb_classes = 3  # number of classes
img_width, img_height = 299, 299  # change based on the shape/structure of your images
batch_size = 64  # try 4, 8, 16, 32, 64, 128, 256 dependent on CPU/GPU memory capacity (powers of 2 values).
nb_epoch = 3  # number of iteration the algorithm gets trained. 1 As test run
learn_rate = 0.045  # sgd learning rate
momentum = 0.9  # sgd momentum to avoid local minimum
transformation_ratio = .05  # how aggressive will be the data augmentation/transformation

### Loading Pre-trained InceptionV3 Model using imagenet dataset weight

In [29]:
# Pre-Trained CNN Model using imagenet dataset for pre-trained weights
base_model = InceptionV3(input_shape=(img_width, img_height, 3), weights='imagenet', include_top=False)
print('Model Loaded!')

Model Loaded!


## Defining own top model block

In [30]:
# Top Model Block
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(nb_classes, activation='relu')(x)

In [31]:
# add your top layer block to your base model
model = Model(base_model.input, predictions)

In [32]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all layers of the based model that is already pre-trained.
for layer in base_model.layers:
    layer.trainable = False

In [33]:
# Read Data and Augment it: Make sure to select augmentations that are appropriate to your images.
# To save augmentations un-comment save lines and add to your flow parameters.
train_datagen = ImageDataGenerator(rescale=1. / 299)

In [34]:
validation_datagen = ImageDataGenerator(rescale=1. / 299)

In [36]:
train_generator = train_datagen.flow_from_directory(train_data_dir,
                                                        target_size=(img_width, img_height),
                                                        batch_size=batch_size,
                                                        class_mode='categorical')

Found 1037 images belonging to 3 classes.


In [37]:
validation_generator = validation_datagen.flow_from_directory(validation_data_dir,
                                                                  target_size=(img_width, img_height),
                                                                  batch_size=batch_size,
                                                                  class_mode='categorical')

Found 444 images belonging to 3 classes.


In [38]:
model.compile(optimizer='nadam',
                  loss='categorical_crossentropy',  # categorical_crossentropy if multi-class classifier
                  metrics=['accuracy'])

In [39]:
# checkpoint
filepath="weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
# See: https://github.com/fchollet/keras/issues/5475
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
print("\nStarting to Fine Tune Model\n")
model.fit_generator(
    train_generator,
    samples_per_epoch=nb_train_samples,
    epochs=nb_epoch,
    validation_data=validation_generator,
    nb_val_samples=nb_validation_samples,
    callbacks=callbacks_list)
print('Finished finetuning')


Starting to Fine Tune Model





Epoch 1/3

In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("model_xception.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model_xception.h5")
print("Saved model to disk")