In [None]:
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn import metrics
import keras
from keras.models import Model
from keras.optimizers import Adam
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Dense, Input, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from google.colab import drive
drive.mount('/content/gdrive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
path = "/content/gdrive/My Drive/train/"
train_set = pd.read_csv('/content/gdrive/My Drive/train_labels.csv')
train_label = np.array(train_set['invasive'].iloc[: ])
train_files = []
for i in range(len(train_set)):
    train_files.append(path + str(int(train_set.iloc[i][0])) +'.jpg')
train_set['name'] = train_files
print(train_set.shape)
train_set.head()

(2295, 2)


Unnamed: 0,name,invasive
0,/content/gdrive/My Drive/train/1.jpg,0
1,/content/gdrive/My Drive/train/2.jpg,0
2,/content/gdrive/My Drive/train/3.jpg,1
3,/content/gdrive/My Drive/train/4.jpg,0
4,/content/gdrive/My Drive/train/5.jpg,1


In [None]:
path = "/content/gdrive/My Drive/test/"
test_set = pd.read_csv('/content/gdrive/My Drive/sample_submission.csv')
test_files = []
for i in range(len(test_set)):
    test_files.append(path + str(int(test_set.iloc[i][0])) +'.jpg')
print(test_set.shape)
test_set.head()

(1531, 2)


Unnamed: 0,name,invasive
0,1,0.5
1,2,0.5
2,3,0.5
3,4,0.5
4,5,0.5


In [None]:
img_dim = (800, 800, 3)
input = Input(shape=img_dim)
base_model = InceptionV3(include_top=False,weights='imagenet',input_shape=img_dim)
bn = BatchNormalization()(input)
x = base_model(bn)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)
model = Model(input, output)
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 800, 800, 3)]     0         
                                                                 
 batch_normalization_94 (Ba  (None, 800, 800, 3)       12        
 tchNormalization)                                               
                                                                 
 inception_v3 (Functional)   (None, 23, 23, 2048)      21802784  
                                                                 
 global_average_pooling2d (  (None, 2048)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dropout (Dropout)           (None, 2048

In [None]:
def train_model(model, batch_size, epochs, img_size, x, y, test, n_fold, kf):
    roc_auc = metrics.roc_auc_score
    preds_train = np.zeros(len(x), dtype = float)
    preds_test = np.zeros(len(test), dtype = float)
    train_scores = []; valid_scores = []

    i = 1

    for train_index, test_index in kf.split(x):
        x_train = x.iloc[train_index]; x_valid = x.iloc[test_index]
        y_train = y[train_index]; y_valid = y[test_index]

        def augment(src, choice):
            if choice == 0:
                # Rotate 90
                src = np.rot90(src, 1)
            if choice == 1:
                # flip vertically
                src = np.flipud(src)
            if choice == 2:
                # Rotate 180
                src = np.rot90(src, 2)
            if choice == 3:
                # flip horizontally
                src = np.fliplr(src)
            if choice == 4:
                # Rotate 90 counter-clockwise
                src = np.rot90(src, 3)
            if choice == 5:
                # Rotate 180 and flip horizontally
                src = np.rot90(src, 2)
                src = np.fliplr(src)
            return src

        def train_generator():
            while True:
                for start in range(0, len(x_train), batch_size):
                    x_batch = []
                    y_batch = []
                    end = min(start + batch_size, len(x_train))
                    train_batch = x_train[start:end]
                    for filepath, tag in train_batch.values:
                        img = cv2.imread(filepath)
                        img = cv2.resize(img, img_size)
                        img = augment(img, np.random.randint(6))
                        x_batch.append(img)
                        y_batch.append(tag)
                    x_batch = np.array(x_batch, np.float32) / 255.
                    y_batch = np.array(y_batch, np.uint8)
                    yield x_batch, y_batch

        def valid_generator():
            while True:
                for start in range(0, len(x_valid), batch_size):
                    x_batch = []
                    y_batch = []
                    end = min(start + batch_size, len(x_valid))
                    valid_batch = x_valid[start:end]
                    for filepath, tag in valid_batch.values:
                        img = cv2.imread(filepath)
                        img = cv2.resize(img, img_size)
                        img = augment(img, np.random.randint(6))
                        x_batch.append(img)
                        y_batch.append(tag)
                    x_batch = np.array(x_batch, np.float32) / 255.
                    y_batch = np.array(y_batch, np.uint8)
                    yield x_batch, y_batch

        def test_generator():
            while True:
                for start in range(0, len(test), batch_size):
                    x_batch = []
                    end = min(start + batch_size, len(test))
                    test_batch = test[start:end]
                    for filepath in test_batch:
                        img = cv2.imread(filepath)
                        img = cv2.resize(img, img_size)
                        x_batch.append(img)
                    x_batch = np.array(x_batch, np.float32) / 255.
                    yield x_batch

        callbacks = [EarlyStopping(monitor='val_loss', patience=3, verbose=1, min_delta=1e-4),
             ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=1, cooldown=1,
                               verbose=1, min_lr=1e-7),
             ModelCheckpoint(filepath='inception.fold_' + str(i) + '.hdf5', verbose=1,
                             save_best_only=True, save_weights_only=True, mode='auto')]

        train_steps = len(x_train) / batch_size
        valid_steps = len(x_valid) / batch_size
        test_steps = len(test) / batch_size

        model = model

        model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy',
                      metrics = ['accuracy'])

        model.fit(train_generator(), train_steps, epochs=epochs, verbose=1,
                            callbacks=callbacks, validation_data=valid_generator(),
                            validation_steps=valid_steps)

        model.load_weights(filepath='inception.fold_' + str(i) + '.hdf5')

        print('Running validation predictions on fold {}'.format(i))
        preds_valid = model.predict_generator(generator=valid_generator(),
                                      steps=valid_steps, verbose=1)[:, 0]

        print('Running train predictions on fold {}'.format(i))
        preds_train = model.predict_generator(generator=train_generator(),
                                      steps=train_steps, verbose=1)[:, 0]

        valid_score = roc_auc(y_valid, preds_valid)
        train_score = roc_auc(y_train, preds_train)
        print('Val Score:{} for fold {}'.format(valid_score, i))
        print('Train Score: {} for fold {}'.format(train_score, i))

        valid_scores.append(valid_score)
        train_scores.append(train_score)
        print('Avg Train Score:{0:0.5f}, Val Score:{1:0.5f} after {2:0.5f} folds'.format
              (np.mean(train_scores), np.mean(valid_scores), i))

        print('Running test predictions with fold {}'.format(i))

        preds_test_fold = model.predict(generator=test_generator(),steps=test_steps, verbose=1)[:, -1]

        preds_test += preds_test_fold

        print('\n\n')

        i += 1

        if i <= n_fold:
            print('Now beginning training for fold {}\n\n'.format(i))
        else:
            print('Finished training!')

    preds_test /= n_fold


    return preds_test

In [None]:
batch_size = 5
epochs = 50
n_fold = 5
img_size = (800, 800)
kf = KFold(n_splits=n_fold, shuffle=True)

test_pred = train_model(model, batch_size, epochs, img_size, train_set,
                        train_label, test_files, n_fold, kf)

test_set['invasive'] = test_pred
test_set.to_csv('/content/gdrive/My Drive/submission.csv', index = None)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  preds_train = np.zeros(len(x), dtype = np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  preds_test = np.zeros(len(test), dtype = np.float)
  model.fit_generator(train_generator(), train_steps, epochs=epochs, verbose=1,


Epoch 1/50
Epoch 1: val_loss improved from inf to 0.63609, saving model to inception.fold_1.hdf5
Epoch 2/50
Epoch 2: val_loss improved from 0.63609 to 0.30848, saving model to inception.fold_1.hdf5
Epoch 3/50
Epoch 3: val_loss improved from 0.30848 to 0.19822, saving model to inception.fold_1.hdf5
Epoch 4/50
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 4: val_loss did not improve from 0.19822
Epoch 5/50
Epoch 5: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.

Epoch 5: val_loss did not improve from 0.19822
Epoch 6/50
Epoch 6: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.

Epoch 6: val_loss did not improve from 0.19822
Epoch 6: early stopping
Running validation predictions on fold 1


  preds_valid = model.predict_generator(generator=valid_generator(),


Running train predictions on fold 1


  preds_train = model.predict_generator(generator=train_generator(),


Val Score:0.9715582601755787 for fold 1
Train Score: 0.9812630731687311 for fold 1
Avg Train Score:0.98126, Val Score:0.97156 after 1.00000 folds
Running test predictions with fold 1


  preds_test_fold = model.predict_generator(generator=test_generator(),









Now beginning training for fold 2




  model.fit_generator(train_generator(), train_steps, epochs=epochs, verbose=1,


Epoch 1/50
Epoch 1: val_loss improved from inf to 0.27727, saving model to inception.fold_2.hdf5
Epoch 2/50
Epoch 2: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 2: val_loss did not improve from 0.27727
Epoch 3/50
Epoch 3: val_loss improved from 0.27727 to 0.23886, saving model to inception.fold_2.hdf5
Epoch 4/50
Epoch 4: val_loss improved from 0.23886 to 0.18331, saving model to inception.fold_2.hdf5
Epoch 5/50
Epoch 5: val_loss improved from 0.18331 to 0.15378, saving model to inception.fold_2.hdf5
Epoch 6/50
Epoch 6: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.

Epoch 6: val_loss did not improve from 0.15378
Epoch 7/50
Epoch 7: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.

Epoch 7: val_loss did not improve from 0.15378
Epoch 8/50
Epoch 8: val_loss improved from 0.15378 to 0.14502, saving model to inception.fold_2.hdf5
Epoch 9/50
Epoch 9: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.

Ep

  preds_valid = model.predict_generator(generator=valid_generator(),


Running train predictions on fold 2


  preds_train = model.predict_generator(generator=train_generator(),


Val Score:0.9871179966044143 for fold 2
Train Score: 0.9873277012005337 for fold 2
Avg Train Score:0.98430, Val Score:0.97934 after 2.00000 folds
Running test predictions with fold 2


  preds_test_fold = model.predict_generator(generator=test_generator(),









Now beginning training for fold 3




  model.fit_generator(train_generator(), train_steps, epochs=epochs, verbose=1,


Epoch 1/50
Epoch 1: val_loss improved from inf to 0.31359, saving model to inception.fold_3.hdf5
Epoch 2/50
Epoch 2: val_loss improved from 0.31359 to 0.13554, saving model to inception.fold_3.hdf5
Epoch 3/50
Epoch 3: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 3: val_loss did not improve from 0.13554
Epoch 4/50
Epoch 4: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.

Epoch 4: val_loss did not improve from 0.13554
Epoch 5/50
Epoch 5: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.

Epoch 5: val_loss did not improve from 0.13554
Epoch 5: early stopping
Running validation predictions on fold 3


  preds_valid = model.predict_generator(generator=valid_generator(),


Running train predictions on fold 3


  preds_train = model.predict_generator(generator=train_generator(),


Val Score:0.9874398823482652 for fold 3
Train Score: 0.9876260876260876 for fold 3
Avg Train Score:0.98541, Val Score:0.98204 after 3.00000 folds
Running test predictions with fold 3


  preds_test_fold = model.predict_generator(generator=test_generator(),









Now beginning training for fold 4




  model.fit_generator(train_generator(), train_steps, epochs=epochs, verbose=1,


Epoch 1/50
Epoch 1: val_loss improved from inf to 0.27029, saving model to inception.fold_4.hdf5
Epoch 2/50
Epoch 2: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 2: val_loss did not improve from 0.27029
Epoch 3/50
Epoch 3: val_loss improved from 0.27029 to 0.14043, saving model to inception.fold_4.hdf5
Epoch 4/50
Epoch 4: val_loss improved from 0.14043 to 0.11837, saving model to inception.fold_4.hdf5
Epoch 5/50
Epoch 5: val_loss improved from 0.11837 to 0.10213, saving model to inception.fold_4.hdf5
Epoch 6/50
Epoch 6: val_loss improved from 0.10213 to 0.09299, saving model to inception.fold_4.hdf5
Epoch 7/50
Epoch 7: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.

Epoch 7: val_loss did not improve from 0.09299
Epoch 8/50
Epoch 8: val_loss improved from 0.09299 to 0.07919, saving model to inception.fold_4.hdf5
Epoch 9/50
Epoch 9: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.

Epoch 9: val_loss did not improve fro

  preds_valid = model.predict_generator(generator=valid_generator(),


Running train predictions on fold 4


  preds_train = model.predict_generator(generator=train_generator(),


Val Score:0.9937860153091941 for fold 4
Train Score: 0.9932030147207098 for fold 4
Avg Train Score:0.98735, Val Score:0.98498 after 4.00000 folds
Running test predictions with fold 4


  preds_test_fold = model.predict_generator(generator=test_generator(),









Now beginning training for fold 5




  model.fit_generator(train_generator(), train_steps, epochs=epochs, verbose=1,


Epoch 1/50
Epoch 1: val_loss improved from inf to 0.39901, saving model to inception.fold_5.hdf5
Epoch 2/50
Epoch 2: val_loss improved from 0.39901 to 0.20515, saving model to inception.fold_5.hdf5
Epoch 3/50
Epoch 3: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 3: val_loss did not improve from 0.20515
Epoch 4/50
Epoch 4: val_loss improved from 0.20515 to 0.16658, saving model to inception.fold_5.hdf5
Epoch 5/50
Epoch 5: val_loss improved from 0.16658 to 0.13960, saving model to inception.fold_5.hdf5
Epoch 6/50