# Assignment 1 - part 2
### split the data to train, validation and test

In [1]:
import numpy as np
import os.path
import pandas as pd
from sklearn.model_selection import StratifiedKFold
models_name = 'models'

*first step: load the data and split it to train and test*

In [2]:
data_X = np.load(os.path.join('data2/train_X2.npy'))
data_Y = np.load(os.path.join('data2/train_Y2.npy'))

let's see the shapes:

In [3]:
print(data_X.shape)
print(data_Y.shape)

(320, 640, 340, 3)
(320,)


we reshape data_Y to be with 1 column:

In [4]:
data_Y = data_Y.reshape(320,1)

In [5]:
y_categorial = pd.get_dummies(data=pd.DataFrame(data=data_Y))
#y_categorial

*split the data to train and test:*  
we will use 70% of the data as train and 30% of the data as test.

In [6]:
from sklearn.model_selection import train_test_split

train_X, test_X, train_Y, test_Y = train_test_split(
    data_X, data_Y, stratify=data_Y, test_size=0.3)

save the test data:

In [7]:
# Save to disk
np.save('data2/test/test_X.npy', test_X)
np.save('data2/test/test_Y.npy', test_Y)

*split the train data to train and validation using stratifiedkfold:*  
we do this by using our own function, that create directory for each model, each directory contain the train&validation data, and will also contain the model architecture and weights.

In [8]:
def split_by_stratifiedKfold(k=5,data_route = 'data2/data.csv'):
    data = pd.read_csv(data_route)
    skf = StratifiedKFold(n_splits=k, shuffle=False)
    if  not os.path.isdir(models_name):
        os.mkdir(models_name)
    for index, (train_indices, val_indices) in enumerate(skf.split(train_X, train_Y)):
        xtrain, xval = train_X[train_indices], train_X[val_indices]
        ytrain, yval = y_categorial.as_matrix()[train_indices], y_categorial.as_matrix()[val_indices]
        if not os.path.isdir(models_name+ '/model_' + str(index)):
            os.mkdir(models_name + '/model_' + str(index))
        np.save(models_name + '/model_' + str(index) + '/xtrain.npy', xtrain)
        np.save(models_name + '/model_' + str(index) + '/ytrain.npy', ytrain)
        np.save(models_name + '/model_' + str(index) + '/xval.npy', xval)
        np.save(models_name + '/model_' + str(index) + '/yval.npy', yval)

we will use k=5 stratified split:

In [9]:
split_by_stratifiedKfold()



### Build our first neural network

import relevant packages:

In [2]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPool2D
from keras.utils import np_utils
import pickle

import tensorflow as tf
import keras.backend.tensorflow_backend as ktf

Using TensorFlow backend.


In [3]:
tf.set_random_seed(42)


def get_session(gpu_fraction=0.8):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=gpu_fraction,
                           allow_growth=True)
    return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))


ktf.set_session(get_session())

form model:

In [12]:
num_classes = 10
nrow,ncol,channels = (640,340,3)
def create_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3),
                            input_shape=(nrow,ncol,channels)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    model.summary()
    return model
#create_model().summary()

def create_light_model():
    
    model = Sequential()
    model.add(Conv2D(16,(3,3),activation='relu',input_shape=(nrow,ncol,channels)))
    model.add(Conv2D(16,(3,3),activation='relu'))
    model.add(MaxPool2D())
    model.add(Conv2D(8,(3,3),activation='relu'))
    model.add(Conv2D(8,(3,3),activation='relu'))
    model.add(Dropout(0.2))
    model.add(MaxPool2D())

    model.add(Flatten())
    model.add(Dense(10,activation='softmax'))
    model.summary()
    return model

#create_light_model().summary()

define callbacks:

In [13]:
from keras.callbacks import *

def set_callbacks(description='normal', path =models_name + '/', patience=10):
    cp = ModelCheckpoint(path + 'best_model_weights_'+ description + '.hdf5',save_best_only=True)
    es = EarlyStopping(patience=patience,monitor='acc')   
    log = CSVLogger(path + 'train_log.csv')
    tb = TensorBoard(log_dir=path + 'logs/')
    reduce_lr_loss = ReduceLROnPlateau(monitor='val_acc',factor=0.1, patience=5, verbose=1, epsilon=1e-3,
                                      mpde='min')
    cb = [es,cp,log,tb,reduce_lr_loss]
    return cb
set_callbacks()



[<keras.callbacks.EarlyStopping at 0x246925bd160>,
 <keras.callbacks.ModelCheckpoint at 0x246925bd0b8>,
 <keras.callbacks.CSVLogger at 0x246925bd198>,
 <keras.callbacks.TensorBoard at 0x246925bd1d0>,
 <keras.callbacks.ReduceLROnPlateau at 0x246925bd208>]

define functions for load and save model:

In [4]:
from keras.models import model_from_json

def save_model(path,model,filename):
    # this is a helper function used to save a keras NN model architecture and weights
    json_string = model.to_json()
    open(os.path.join(path, filename+'_architecture.json'), 'w').write(json_string)
    model.save_weights(os.path.join(path, filename+'_model_weights_final.h5'), overwrite=True)
    
def read_model(path, filename):
    # this is a helper function used to restore a keras NN model architecture and weights
    model = model_from_json(open(os.path.join(path, filename+'_architecture.json')).read())
    model.load_weights(os.path.join(path, filename+'_model_weights_final.h5'))
    model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [15]:
def read_model_data_train(path):
    xtrain = np.load(os.path.join(path + 'xtrain.npy'))
    ytrain = np.load(os.path.join(path + 'ytrain.npy'))
    xval = np.load(os.path.join(path + 'xval.npy'))
    yval = np.load(os.path.join(path + 'yval.npy'))
    return xtrain,ytrain,xval,yval 
    
def train_model(datagenerator=None,num_of_epochs=20):
    modelsHistory=[]
    for i in range(5):
        path = models_name + '/model_' + str(i)
        aug=''
        model = None
        model = create_light_model()
        model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
        xtrain,ytrain,xval,yval = read_model_data_train(path + '/')
        print('start fit model_' + str(i))
        if not datagenerator == None:
            aug = '_aug'
            datagenerator.fit(xtrain)
            history = model.fit_generator(datagenerator.flow(xtrain, ytrain,batch_size=10), 
                                                     callbacks = set_callbacks(path = path + '/',
                                                     description = aug),
                                                     validation_data=datagen.flow(xval, yval,batch_size=4),
                                                     epochs=num_of_epochs,
                                                     steps_per_epoch = len(xtrain)//8,
                                                     validation_steps = 100)
            modelsHistory.append(history)
        else:
            print('without data augmantation')
            history = model.fit(xtrain,ytrain,validation_data=[xval,yval],epochs=num_of_epochs,batch_size=4,
                                           callbacks = set_callbacks(path = path + '/'))
            modelsHistory.append(history)
        save_model(path,model,'model_' + str(i) + aug)
        f = open(path + '/history' ,"wb")
        pickle.dump(history.history,f)
        f.close()
    return modelsHistory

let's train:

In [16]:

history = train_model()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 638, 338, 16)      448       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 636, 336, 16)      2320      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 318, 168, 16)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 316, 166, 8)       1160      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 314, 164, 8)       584       
_________________________________________________________________
dropout_1 (Dropout)          (None, 314, 164, 8)       0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 157, 82, 8)        0         
__________

Epoch 18/20
Epoch 19/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 638, 338, 16)      448       
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 636, 336, 16)      2320      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 318, 168, 16)      0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 316, 166, 8)       1160      
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 314, 164, 8)       584       
_________________________________________________________________
dropout_3 (Dropout)          (None, 314, 164, 8)       0         
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 157, 82, 8)     

Epoch 13/20
Epoch 14/20
Epoch 15/20

Epoch 00015: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 16/20
Epoch 17/20
Epoch 18/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 638, 338, 16)      448       
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 636, 336, 16)      2320      
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 318, 168, 16)      0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 316, 166, 8)       1160      
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 314, 164, 8)       584       
_________________________________________________________________
dropout_5 (Dropout)          (None, 314, 164, 8)      

let's try to train with augmentation:

In [19]:
#del history
#del model
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=10,
    horizontal_flip=True,
    vertical_flip=True
    )
history = train_model(datagenerator = datagen)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 638, 338, 16)      448       
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 636, 336, 16)      2320      
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 318, 168, 16)      0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 316, 166, 8)       1160      
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 314, 164, 8)       584       
_________________________________________________________________
dropout_6 (Dropout)          (None, 314, 164, 8)       0         
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 157, 82, 8)        0         
__________

Epoch 20/20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_29 (Conv2D)           (None, 638, 338, 16)      448       
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 636, 336, 16)      2320      
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 318, 168, 16)      0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 316, 166, 8)       1160      
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 314, 164, 8)       584       
_________________________________________________________________
dropout_8 (Dropout)          (None, 314, 164, 8)       0         
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 157, 82, 8)        0        

Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Epoch 00020: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_37 (Conv2D)           (None, 638, 338, 16)      448       
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 636, 336, 16)      2320      
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 318, 168, 16)      0         
_________________________________________________________________
conv2d_39 (Conv2D)           (None, 316, 166, 8)       1160      
_________________________________________________________________
conv2d_40 (Conv2D)           (None, 314, 164, 8)       584       
_________________________________________________________________
dropout_10 (Dropout)         (None, 314, 164, 8)       0         
____________

read history.... blablablalba

In [None]:
with open(models_name + '/model_0/history.pkl', 'rb') as handle:
    b = pickle.load(handle)


In [5]:

def predict_and_save(model,save_path,load_path,aug=''):
    x_test = np.load(load_path+'test_X.npy')
    y_test = np.load(load_path+'test_Y.npy')
    y_pred= model.predict(x_test)
    np.save(save_path+'model_'+aug+'_predictions.npy', y_pred)
    #print(model.evaluate(x_test,y_test))
    logloss = log_loss(y_test,y_pred)
    del model
    print('logLoss '+aug+': {}'.format(logloss))
    
    

In [6]:
from sklearn.metrics import log_loss
for i in range(5):
    print('model {} metrics:'.format(i))
    model = read_model('models/model_{}/'.format(i),'model_{}_aug'.format(i))
    predict_and_save(model,'models/model_{}/'.format(i),'data2/test/','aug')
    
    model = read_model('models/model_{}/'.format(i),'model_{}'.format(i))
    predict_and_save(model,'models/model_{}/'.format(i),'data2/test/')

model 0 metrics:


ResourceExhaustedError: OOM when allocating tensor with shape[32,16,636,336] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node conv2d_22/convolution}} = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](conv2d_21/Relu, conv2d_22/kernel/read)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[{{node dense_6/Softmax/_81}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_96_dense_6/Softmax", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


In [None]:
pred0 = np.load("models/model_0/model__predictions.npy")
pred1=np.load("models/model_1/model__predictions.npy")
pred2=np.load("models/model_2/model__predictions.npy")
pred3=np.load("models/model_3/model__predictions.npy")
pred4=np.load("models/model_4/model__predictions.npy")
pred_avg = (pred0 + pred1+pred2+pred3+pred4)/5

y_test = np.load('data/test/test_Y.npy')
logloss = log_loss(y_test,pred_avg)
logloss