In [9]:
# import dependencies
import json
import math
import os

import cv2
from PIL import Image
import numpy as np
from keras import layers
from keras.applications.resnet50 import ResNet50
from keras.applications.mobilenet import MobileNet
from keras.applications.densenet import DenseNet201
from keras.applications.inception_v3 import InceptionV3
from keras.applications.nasnet import NASNetLarge, NASNetMobile
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.callbacks import Callback, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
from tqdm import tqdm
import tensorflow as tf
from keras import backend as K
import gc
from functools import partial
from sklearn import metrics
from collections import Counter
import json
import itertools


%matplotlib inline

In [19]:
# Self-defined functions
#Transfer 'jpg' images to an array IMG
def Dataset_loader(DIR, RESIZE, sigmaX=10):
    #IMG = []
    BIMG = []
    MIMG = []
    read = lambda imname: np.asarray(Image.open(imname).convert("RGB"))
    for IMAGE_NAME in tqdm(os.listdir(DIR)):
        PATH = os.path.join(DIR,IMAGE_NAME)
        
        name, ftype = os.path.splitext(PATH)
        #print(name)
        if ftype == ".png":
            img = read(PATH)
            img = cv2.resize(img, (RESIZE,RESIZE))
            if name[26] =='B':
                BIMG.append(np.array(img))
            elif name[26]== 'M':
                MIMG.append(np.array(img))
            #IMG.append(np.array(img))
    return BIMG, MIMG

def build_model(backbone, lr=1e-4):
    model = Sequential()
    model.add(backbone)
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dropout(0.5))
    model.add(layers.BatchNormalization())
    model.add(layers.Dense(2, activation='softmax'))
    
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=lr),
        metrics=['accuracy']
    )
    
    return model

### Load dataset

In [28]:
f1_b_train, f1_m_train = Dataset_loader('data/fold1/train/200X', 224)
f1_b_train = np.array(f1_b_train)
f1_m_train = np.array(f1_m_train)

f2_b_train, f2_m_train = Dataset_loader('data/fold2/train/200X', 224)
f2_b_train = np.array(f2_b_train)
f2_m_train = np.array(f2_m_train)

f3_b_train, f3_m_train = Dataset_loader('data/fold3/train/200X', 224)
f3_b_train = np.array(f3_b_train)
f3_m_train = np.array(f3_m_train)

f4_b_train, f4_m_train = Dataset_loader('data/fold4/train/200X', 224)
f4_b_train = np.array(f4_b_train)
f4_m_train = np.array(f4_m_train)

f5_b_train, f5_m_train = Dataset_loader('data/fold5/train/200X', 224)
f5_b_train = np.array(f5_b_train)
f5_m_train = np.array(f5_m_train)

100%|██████████| 1269/1269 [00:17<00:00, 71.63it/s]
100%|██████████| 1416/1416 [00:20<00:00, 69.22it/s]
100%|██████████| 1354/1354 [00:19<00:00, 69.36it/s]
100%|██████████| 1334/1334 [00:19<00:00, 69.24it/s]
100%|██████████| 1243/1243 [00:18<00:00, 68.61it/s]


In [29]:
print(f1_b_train.shape)
print(f1_m_train.shape)

(368, 224, 224, 3)
(901, 224, 224, 3)


In [30]:
f1_b_test, f1_m_test = Dataset_loader('data/fold1/test/200X', 224)
f1_b_test = np.array(f1_b_test)
f1_m_test = np.array(f1_m_test)

f2_b_test, f2_m_test = Dataset_loader('data/fold2/test/200X', 224)
f2_b_test = np.array(f2_b_test)
f2_m_test = np.array(f2_m_test)

f3_b_test, f3_m_test = Dataset_loader('data/fold3/test/200X', 224)
f3_b_test = np.array(f3_b_test)
f3_m_test = np.array(f3_m_test)

f4_b_test, f4_m_test = Dataset_loader('data/fold4/test/200X', 224)
f4_b_test = np.array(f4_b_test)
f4_m_test = np.array(f4_m_test)

f5_b_test, f5_m_test = Dataset_loader('data/fold5/test/200X', 224)
f5_b_test = np.array(f5_b_test)
f5_m_test = np.array(f5_m_test)

100%|██████████| 744/744 [00:10<00:00, 70.57it/s]
100%|██████████| 597/597 [00:08<00:00, 68.24it/s]
100%|██████████| 659/659 [00:09<00:00, 70.65it/s]
100%|██████████| 679/679 [00:09<00:00, 69.18it/s]
100%|██████████| 770/770 [00:11<00:00, 69.23it/s]


In [31]:
b_train = np.concatenate((f1_b_train, f2_b_train, f3_b_train, f4_b_train, f5_b_train), axis=0)
m_train = np.concatenate((f1_m_train, f2_m_train, f3_m_train, f4_m_train, f5_m_train), axis=0)
b_test = np.concatenate((f1_b_test, f2_b_test, f3_b_test, f4_b_test, f5_b_test), axis = 0)
m_test = np.concatenate((f1_m_test, f2_m_test, f3_m_test, f4_m_test, f5_m_test), axis = 0)

### Create Label

In [32]:
b_train_label = np.zeros(len(b_train))
m_train_label = np.ones(len(m_train))
b_test_label = np.zeros(len(b_test))
m_test_label = np.ones(len(m_test))

In [33]:
# Merge data 
X_train = np.concatenate((b_train, m_train), axis = 0)
Y_train = np.concatenate((b_train_label, m_train_label), axis = 0)
X_test = np.concatenate((b_test, m_test), axis = 0)
Y_test = np.concatenate((b_test_label, m_test_label), axis = 0)

In [34]:
s = np.arange(X_train.shape[0])
np.random.shuffle(s)
X_train = X_train[s]
Y_train = Y_train[s]

s = np.arange(X_test.shape[0])
np.random.shuffle(s)
X_test = X_test[s]
Y_test = Y_test[s]

# To categorical
Y_train = to_categorical(Y_train, num_classes= 2)
Y_test = to_categorical(Y_test, num_classes= 2)

### Train and Evalutation split

In [35]:
x_train, x_val, y_train, y_val = train_test_split(
    X_train, Y_train, 
    test_size=0.2, 
    random_state=11
)

### Data Generator

In [36]:
BATCH_SIZE = 16

# Using original generator
train_generator = ImageDataGenerator(
        zoom_range=2,  # set range for random zoom
        rotation_range = 90,
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True,  # randomly flip images
    )

### Model: ResNet50

In [37]:
K.clear_session()
gc.collect()

resnet = DenseNet201(
    weights='imagenet',
    include_top=False,
    input_shape=(224,224,3)
)

model = build_model(resnet ,lr = 1e-4)
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet201 (Functional)     (None, 7, 7, 1920)        18321984  
_________________________________________________________________
global_average_pooling2d (Gl (None, 1920)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1920)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 1920)              7680      
_________________________________________________________________
dense (Dense)                (None, 2)                 3842      
Total params: 18,333,506
Trainable params: 18,100,610
Non-trainable params: 232,896
______________________



In [38]:
# Learning Rate Reducer
learn_control = ReduceLROnPlateau(monitor='val_acc', patience=5,
                                  verbose=1,factor=0.2, min_lr=1e-7)

# Checkpoint
filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

### Training & Evaluation

In [None]:
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

num_classes = 2
def cnn(train_X, test_X, train_Y, test_Y, num_classes):
    model = Sequential()
    model.add(Conv2D(32, (5, 5), strides = (1,1), padding='same', 
                 input_shape=x_train.shape[1:]))  # 第一层需要指出图像的大小
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(1,1)))
    model.add(Conv2D(64, (5, 5), strides = (1,1)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(1,1)))
    model.add(Conv2D(128, (5, 5), strides = (1,1)))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(1,1)))
    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    model.summary()
    
    lr=0.01
    epoch = 5
    opt = keras.optimizers.rmsprop(lr, decay=1e-6)
    model.compile(loss='categorical_crossentropy',
              optimizer=opt,metrics=['accuracy'])
    
    model.summary()

    # here we use the test_set as the validation_set    
    model.fit(train_X,train_Y,batch_size=128,epochs=epoch,verbose=2,
                validation_data=(test_X,test_Y))
    score = model.evaluate(test_X,test_Y,verbose=0)
    print(score)

### Prediction