In [1]:
import pandas as pd
import numpy as np
import os, sys, math
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import scipy.optimize as opt
from PIL import Image
import cv2
import random as rn

In [2]:
import tensorflow as tf
from tensorflow.image import resize_images

In [3]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model, Model
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
from keras import layers
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, LeakyReLU, PReLU, Input
from keras.utils import np_utils
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint, History, TensorBoard
from keras.applications.inception_resnet_v2 import InceptionResNetV2

Using TensorFlow backend.


In [4]:
os.environ['PYTHONHASHSEED'] = '0'
# Setting the seed for numpy-generated random numbers
np.random.seed(37)
# Setting the seed for python random numbers
rn.seed(1254)
# Setting the graph-level random seed.
tf.set_random_seed(89)
# 自動增長 GPU 記憶體用量
gpu_options = tf.GPUOptions(allow_growth=True)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
# 設定 Keras 使用的 Session
tf.keras.backend.set_session(sess)

In [5]:
"""
name_label_dict = {
0:  'Nucleoplasm',
1:  'Nuclear membrane',
2:  'Nucleoli',   
3:  'Nucleoli fibrillar center',
4:  'Nuclear speckles',
5:  'Nuclear bodies',
6:  'Endoplasmic reticulum',   
7:  'Golgi apparatus',
8:  'Peroxisomes',
9:  'Endosomes',
10:  'Lysosomes',
11:  'Intermediate filaments',
12:  'Actin filaments',
13:  'Focal adhesion sites',   
14:  'Microtubules',
15:  'Microtubule ends',  
16:  'Cytokinetic bridge',   
17:  'Mitotic spindle',
18:  'Microtubule organizing center',  
19:  'Centrosome',
20:  'Lipid droplets',
21:  'Plasma membrane',   
22:  'Cell junctions', 
23:  'Mitochondria',
24:  'Aggresome',
25:  'Cytosol',
26:  'Cytoplasmic bodies',   
27:  'Rods & rings' }
"""

"\nname_label_dict = {\n0:  'Nucleoplasm',\n1:  'Nuclear membrane',\n2:  'Nucleoli',   \n3:  'Nucleoli fibrillar center',\n4:  'Nuclear speckles',\n5:  'Nuclear bodies',\n6:  'Endoplasmic reticulum',   \n7:  'Golgi apparatus',\n8:  'Peroxisomes',\n9:  'Endosomes',\n10:  'Lysosomes',\n11:  'Intermediate filaments',\n12:  'Actin filaments',\n13:  'Focal adhesion sites',   \n14:  'Microtubules',\n15:  'Microtubule ends',  \n16:  'Cytokinetic bridge',   \n17:  'Mitotic spindle',\n18:  'Microtubule organizing center',  \n19:  'Centrosome',\n20:  'Lipid droplets',\n21:  'Plasma membrane',   \n22:  'Cell junctions', \n23:  'Mitochondria',\n24:  'Aggresome',\n25:  'Cytosol',\n26:  'Cytoplasmic bodies',   \n27:  'Rods & rings' }\n"

In [6]:
NAME = "test5_notgenerator_densenet169_dense_TTA_test"# os.path.basename(__file__).split('.')[0]
PATH = os.getcwd()
TRAIN = os.path.join(os.getcwd(), 'data', 'train')
TEST = os.path.join(os.getcwd(), 'data', 'test')
PREPROCESSED = os.path.join(os.getcwd(), 'preprocessed_data')
LABELS = os.path.join(os.getcwd(), 'data', 'train.csv')
SAMPLE = os.path.join(os.getcwd(), 'data', 'sample_submission.csv')
MODEL = os.path.join(os.getcwd(), 'model', NAME[:-9]+'.h5')
RESULT = os.path.join(os.getcwd(), 'result', NAME+'_submission.csv')

In [7]:
from tta_wrapper import tta_classification

In [8]:
IMAGE_LENGTH = 512
IMAGE_WIDTH = 512
CHANNEL_NUM = 4
#TRAIN_SIZE = int(len(os.listdir(TRAIN))/4)
LABEL_NUM = 28

In [9]:
test_x = np.load(os.path.join(PREPROCESSED, 'test_RGBY_original_x.npy'))

In [10]:
def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    # tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

In [11]:
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed

In [12]:
model = load_model(MODEL, custom_objects={'f1': f1, 'focal_loss_fixed' : focal_loss()})

In [13]:

tta_model = tta_classification(model, h_flip=True, rotation=(90, 270), 
                             merge='mean')


test_pred_y = tta_model.predict(test_x, batch_size=1, verbose=1)
#test_pred_y = model.predict(test_x, verbose=1)



In [14]:
test_pred_y

array([[1.18836679e-01, 4.03447310e-04, 7.91559219e-01, ...,
        4.72033285e-02, 1.60030395e-04, 1.18864783e-07],
       [1.00527845e-01, 5.42527437e-03, 1.65713243e-02, ...,
        1.85586408e-01, 6.32363418e-03, 9.34159325e-05],
       [2.63258427e-01, 7.03239581e-04, 1.19709847e-02, ...,
        4.75552291e-01, 6.64077015e-06, 1.04555841e-06],
       ...,
       [8.38446096e-02, 3.98427481e-03, 6.99415803e-03, ...,
        8.46316740e-02, 2.59049819e-03, 6.09253475e-04],
       [2.90850073e-01, 5.39635658e-01, 1.39783220e-02, ...,
        1.43636093e-02, 7.67411618e-03, 8.04565425e-05],
       [4.35553640e-01, 1.78415459e-02, 4.46308078e-03, ...,
        3.28861088e-01, 1.77927397e-03, 8.57601935e-06]], dtype=float32)

In [15]:
thre_list = np.load(os.path.join(PREPROCESSED, 'test5_notgenerator_densenet169_dense_TTA_thre.npy'))

In [16]:
result_bool = ((test_pred_y - np.array(thre_list)) >= np.zeros((test_pred_y.shape)))

In [17]:
submission_df = pd.read_csv(SAMPLE)

In [18]:
for i in range(result_bool.shape[0]):
    ans_str = ""
    for j in range(result_bool.shape[1]):
        if result_bool[i,j]:
            ans_str += str(j)+" "
    submission_df.loc[i,'Predicted'] = ans_str[:-1]

In [19]:
submission_df

Unnamed: 0,Id,Predicted
0,00008af0-bad0-11e8-b2b8-ac1f6b6435d0,2
1,0000a892-bacf-11e8-b2b8-ac1f6b6435d0,5 25
2,0006faa6-bac7-11e8-b2b7-ac1f6b6435d0,0 5 25
3,0008baca-bad7-11e8-b2b9-ac1f6b6435d0,0 25
4,000cce7e-bad4-11e8-b2b8-ac1f6b6435d0,7 23 25
5,00109f6a-bac8-11e8-b2b7-ac1f6b6435d0,4 21 25
6,001765de-bacd-11e8-b2b8-ac1f6b6435d0,4 25
7,0018641a-bac9-11e8-b2b8-ac1f6b6435d0,0 25
8,00200f22-bad7-11e8-b2b9-ac1f6b6435d0,0 25
9,0026f154-bac6-11e8-b2b7-ac1f6b6435d0,4 21 25


In [20]:
'' in submission_df['Predicted'].unique()

False

In [21]:
submission_df.to_csv(RESULT, index=False)

In [22]:
thre_list

array([0.207, 0.106, 0.083, 0.072, 0.079, 0.091, 0.083, 0.094, 0.258,
       0.108, 0.038, 0.047, 0.117, 0.108, 0.051, 0.061, 0.091, 0.089,
       0.068, 0.116, 0.053, 0.079, 0.111, 0.096, 0.167, 0.171, 0.072,
       0.026])