In [1]:
# 「Chest X-Ray Images(Pnuemonia)」で肺炎か診断
#  前処理で保存した特徴量、ターゲットから
#  このモデルをベースラインとする

In [38]:
import numpy as np
import pandas as pd

import cv2

import matplotlib.pyplot as plt

import keras as ks
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.utils import np_utils
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score, f1_score

import os
import glob

%matplotlib inline


In [3]:
# path の設定
test_path = "./dataset/chest_xray/test/"
train_path ='./dataset/chest_xray/train/'

In [4]:
# 訓練データ読み出し
X_train = np.load(train_path + 'X_train.npy')
y_train = np.load(train_path + 'y_train.npy')

In [5]:
X_train.shape, y_train.shape

((5216, 64, 64, 3), (5216,))

In [6]:
# テストデータ読み出し
X_test = np.load(test_path + 'X_test.npy')
y_test = np.load(test_path + 'y_test.npy')

In [7]:
X_test.shape, y_test.shape

((624, 64, 64, 3), (624,))

In [8]:
# モデル生成
model = Sequential()

In [9]:
# 入力層と畳み込み層
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(64, 64, 3)))

Instructions for updating:
Colocations handled automatically by placer.


In [10]:
# プール層
model.add(MaxPooling2D(pool_size=(2, 2)))

# ドロップアウト
model.add(Dropout(0.25))

In [11]:
# 全結合層
model.add(Flatten())
model.add(Dense(128, activation='relu'))

# ドロップアウト
model.add(Dropout(0.33))

In [12]:
# 出力層 二項分類なので sigmoid
model.add(Dense(1, activation='sigmoid'))

In [13]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 31, 31, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 31, 31, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 30752)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               3936384   
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                

In [14]:
# コンパイル
# 2項分類なので損失関数は [binary_crossentropy]
# 最適化は Adam
model.compile(loss='binary_crossentropy',
              optimizer=Adam(lr=0.00001),
              metrics=['accuracy'])

In [15]:
# EarlyStopping
callbaks = EarlyStopping(monitor='val_loss', patience=5)

In [16]:
# モデル訓練
model.fit(X_train, y_train, validation_data=(X_test, y_test), callbacks=[callbaks], epochs=20)

Instructions for updating:
Use tf.cast instead.
Train on 5216 samples, validate on 624 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20


<keras.callbacks.callbacks.History at 0x21fccfffb70>

In [17]:
base_y_pred = model.predict_classes(X_test)

In [18]:
base_y_pred[:5]

array([[1],
       [1],
       [1],
       [0],
       [1]])

In [19]:
confusion_matrix(y_test, base_y_pred)

array([[ 55, 179],
       [  1, 389]], dtype=int64)

In [20]:
# 正解率
accuracy_score(y_test, base_y_pred)

0.7115384615384616

In [21]:
# 適合率
precision_score(y_test, base_y_pred)

0.6848591549295775

In [22]:
# 再現率
recall_score(y_test, base_y_pred)

0.9974358974358974

In [23]:
# F1メジャー
f1_score(y_test, base_y_pred)

0.8121085594989562

In [24]:
387/(143+387)

0.730188679245283

In [25]:
387/(387+3)

0.9923076923076923

In [26]:
# F1?
(387*2)/(147+387+387+3)

0.8376623376623377

In [27]:
(387+91)/(387+91+3+147)

0.7611464968152867

In [34]:
np.where(y_test[:] == 1).count

<function tuple.count>

In [37]:
np.count_nonzero(y_train == 1), np.count_nonzero(y_train == 0)

(3875, 1341)

In [39]:
K.image_data_format()

'channels_last'