In [1]:
import pandas as pd
import numpy as np
import os
import gc
from classification_models.keras import Classifiers
import efficientnet
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, Dense, Dropout, BatchNormalization, concatenate
from keras.models import Model
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import keras
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

Using TensorFlow backend.


In [2]:
HEIGHT = 100
WIDTH = 100
CHANNELS = 1

In [3]:
def create_model(BACKBONE, HEIGHT, WIDTH, CHANNELS, WEIGHTS):
    
    arch, _ = Classifiers.get(BACKBONE)
    model = arch((HEIGHT, WIDTH, CHANNELS), weights=WEIGHTS, include_top=False)
    x_Avg = GlobalAveragePooling2D()(model.output)
    x_Max = GlobalMaxPooling2D()(model.output)
    concat = concatenate([x_Avg, x_Max])
    concat = BatchNormalization()(concat)
    concat = Dropout(0.5)(concat)
    #ROOT
    x = Dense(512)(concat)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    root = Dense(168, activation='sigmoid', name='root')(x)

    #VOWEL
    x = Dense(512)(concat)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    vowel = Dense(11, activation='sigmoid', name='vowel')(x)

    #CONSONENT
    x = Dense(512)(concat)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    consonent = Dense(7, activation='sigmoid', name='consonent')(x)

    model = Model(inputs = [model.input], outputs=[root, vowel, consonent])
    return model

In [4]:
FOLDER_TRAIN = "train_images_100_100/"
files = os.listdir(FOLDER_TRAIN)

In [5]:
data = pd.DataFrame()
data["file_names"] = files
data["file_names"] = FOLDER_TRAIN + data["file_names"]
data.head()

Unnamed: 0,file_names
0,train_images_100_100/141642_92_3_0.npy
1,train_images_100_100/165563_78_1_0.npy
2,train_images_100_100/27893_94_2_0.npy
3,train_images_100_100/189347_53_7_0.npy
4,train_images_100_100/6958_153_1_5.npy


In [6]:
x = data["file_names"].str.split(".", n = 1, expand = True)
x = x[x.columns[0]]
x = x.str.split("_", n = 7, expand = True)
data["root"] = x[x.columns[4]]
data["vowel"] = x[x.columns[5]]
data["consonent"] = x[x.columns[6]]
data.head()

Unnamed: 0,file_names,root,vowel,consonent
0,train_images_100_100/141642_92_3_0.npy,92,3,0
1,train_images_100_100/165563_78_1_0.npy,78,1,0
2,train_images_100_100/27893_94_2_0.npy,94,2,0
3,train_images_100_100/189347_53_7_0.npy,53,7,0
4,train_images_100_100/6958_153_1_5.npy,153,1,5


In [7]:
X = data["file_names"].values
y = data[["root", "vowel", "consonent"]].values

In [8]:
mskf = MultilabelStratifiedKFold(n_splits=5, random_state=0)
train_indexes = []
test_indexes = []

for train_index, test_index in mskf.split(X, y):
    train_indexes.append(train_index)
    test_indexes.append(test_index)

In [9]:
del data, x, files
gc.collect()

106

In [10]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=(100, 100), n_channels=1, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.list_IDs_IND = range(0, len(list_IDs))
        
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs_IND[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))

        y1 = np.empty((self.batch_size), dtype=int)
        y2 = np.empty((self.batch_size), dtype=int)
        y3 = np.empty((self.batch_size), dtype=int)

        # Generate data
        
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            #print (ID)
            X[i, :, :, 0] = np.load(self.list_IDs[ID]) / 255.0

            # Store class
            temp_y = self.labels[ID]
            #print(temp_y)
            y1[i] = temp_y[0]
            y2[i] = temp_y[1]
            y3[i] = temp_y[2]
        y1 = keras.utils.to_categorical(y1, num_classes=168)
        y2 = keras.utils.to_categorical(y2, num_classes=11)
        y3 = keras.utils.to_categorical(y3, num_classes=7)
        
        return X, [y1, y2, y3]

In [11]:
#FIRST FOLD
FOLD_NUM = 0
X_train, X_test = X[train_indexes[FOLD_NUM]], X[test_indexes[FOLD_NUM]]
y_train, y_test = y[train_indexes[FOLD_NUM]], y[test_indexes[FOLD_NUM]]
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((160672,), (40168,), (160672, 3), (40168, 3))

In [12]:
train_generator = DataGenerator(list_IDs=X_train, labels=y_train, batch_size=32)
valid_generator = DataGenerator(list_IDs=X_test, labels=y_test, batch_size=32)

In [13]:
model = create_model(BACKBONE="resnet34", HEIGHT=HEIGHT, WIDTH=WIDTH, CHANNELS=CHANNELS, WEIGHTS=None)
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
data (InputLayer)               (None, 100, 100, 1)  0                                            
__________________________________________________________________________________________________
bn_data (BatchNormalization)    (None, 100, 100, 1)  3           data[0][0]                       
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 106, 106, 1)  0           bn_data[0][0]                    
__________________________________________________________________________________________________
conv0 (Conv2D)       

In [14]:
checkpointer = ModelCheckpoint(filepath = 'model_best_root_loss_fold_' + str(FOLD_NUM) + '.h5',
                               verbose=1,
                               save_best_only=True, save_weights_only = True)

reduce_lr = ReduceLROnPlateau(monitor='val_root_loss', factor=0.8,
                              patience=10, min_lr=0.000001, verbose=1, cooldown=1)

In [16]:
model.compile(optimizer=keras.optimizers.Adam(), 
              loss={
                  'root' :'binary_crossentropy',
                   'vowel' :'binary_crossentropy',
                   'consonent' :'binary_crossentropy',
                  }, 
                loss_weights = {
                    'root' : 0.5,
                   'vowel' : 0.35,
                   'consonent' : 0.15,
                },
              metrics=['accuracy'])

In [None]:
model.fit_generator(generator=train_generator, epochs=80, validation_data=valid_generator)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/80
 232/5021 [>.............................] - ETA: 23:06 - loss: 0.4383 - root_loss: 0.3723 - vowel_loss: 0.5056 - consonent_loss: 0.5013 - root_acc: 0.8754 - vowel_acc: 0.8176 - consonent_acc: 0.8083