<a href="https://colab.research.google.com/github/Itsuki-Hamano123/auto_ml/blob/master/auto-keras/colorectal_histology/MobileNet_FineTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install tensorflow-hub
%pip show tensorflow-hub
%pip show tensorflow

Name: tensorflow-hub
Version: 0.9.0
Summary: TensorFlow Hub is a library to foster the publication, discovery, and consumption of reusable parts of machine learning models.
Home-page: https://github.com/tensorflow/hub
Author: Google LLC
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/lib/python3.6/dist-packages
Requires: numpy, six, protobuf
Required-by: 
Name: tensorflow
Version: 2.3.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /usr/local/lib/python3.6/dist-packages
Requires: astunparse, absl-py, grpcio, scipy, wrapt, numpy, wheel, h5py, google-pasta, gast, termcolor, keras-preprocessing, six, protobuf, tensorflow-estimator, tensorboard, opt-einsum
Required-by: fancyimpute


In [2]:
import numpy as np

import tensorflow as tf
from tensorflow.keras import Sequential, layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

import tensorflow_datasets  as tfds
import tensorflow_hub as hub

from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

  import pandas.util.testing as tm


## データの読み込み

In [3]:
def fetch_tf_dataset(data_name, 
                     split=['train[0%:60%]','train[60%:80%]','train[80%:100%]'],
                     shuffle_files=True, as_supervised=True,
                     with_info=True, batch_size=None):
  '''TensorFlowデータセットからデータをフェッチ'''
  (ds_train, ds_val, ds_test), data_info = tfds.load(data_name, split=split,
                                  shuffle_files=shuffle_files,
                                  as_supervised=as_supervised,
                                  with_info=with_info,
                                  batch_size=batch_size)
  return ds_train, ds_val, ds_test, data_info
  


data_name = 'colorectal_histology' # @param{type:'string'}
x_name = 'image' # @param{type:'string'}
y_name = 'label' # @param{type:'string'}
# データのバッチサイズ
BATCH_SIZE = 256 # @param{type:'number'}

# データのフェッチ
ds_train, ds_val, ds_test, ds_info = fetch_tf_dataset(data_name=data_name,
                                     as_supervised=True,
                                     batch_size=BATCH_SIZE)

display(ds_info)

[1mDownloading and preparing dataset colorectal_histology/2.0.0 (download: 246.14 MiB, generated: Unknown size, total: 246.14 MiB) to /root/tensorflow_datasets/colorectal_histology/2.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Extraction completed...', max=1.0, styl…











HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/colorectal_histology/2.0.0.incomplete70EC8B/colorectal_histology-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))

[1mDataset colorectal_histology downloaded and prepared to /root/tensorflow_datasets/colorectal_histology/2.0.0. Subsequent calls will reuse this data.[0m


tfds.core.DatasetInfo(
    name='colorectal_histology',
    version=2.0.0,
    description='Classification of textures in colorectal cancer histology. Each example is a 150 x 150 x 3 RGB image of one of 8 classes.',
    homepage='https://zenodo.org/record/53169#.XGZemKwzbmG',
    features=FeaturesDict({
        'filename': Text(shape=(), dtype=tf.string),
        'image': Image(shape=(150, 150, 3), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=8),
    }),
    total_num_examples=5000,
    splits={
        'train': 5000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{kather2016multi,
      title={Multi-class texture analysis in colorectal cancer histology},
      author={Kather, Jakob Nikolas and Weis, Cleo-Aron and Bianconi, Francesco and Melchers, Susanne M and Schad, Lothar R and Gaiser, Timo and Marx, Alexander and Z{"o}llner, Frank Gerrit},
      journal={Scientific reports},
      volume={6},
      pages={27988},
      y

# モデル作成

## 前処理を行うKerasレイヤー
https://www.tensorflow.org/tutorials/images/data_augmentation?hl=ja

In [4]:
# リサイズ後の画像サイズ
RESIZE_IMG_SIZE = 224

# 画像のリサイズと、画素値のリスケールを行う層
resize_and_rescale = tf.keras.Sequential(layers=[
  layers.experimental.preprocessing.Resizing(height=RESIZE_IMG_SIZE,
                                             width=RESIZE_IMG_SIZE),
  layers.experimental.preprocessing.Rescaling(1./255)
], name='resize_and_rescale')


# Data Augmentationを行う層 
# 推論時には実行されない層
data_augmentation = tf.keras.Sequential(layers=[
                                          layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
                                          layers.experimental.preprocessing.RandomRotation(0.2),
                                        ], name='train_data_augmentation')

## ファインチューニングさせるMobileNet
前処理レイヤーの差し込みも行う

In [5]:
def _fetch_hub_keras_layer(hub_url, trainable=False):
  '''tensorflow hubのレイヤーをKerasレイヤーとしてfetch'''
  fetch_keras_layer = hub.KerasLayer(hub_url, trainable=trainable)
  return fetch_keras_layer


def clf_model_fn(hub_url, input_shape, output_shape, hub_layer_trainable=False):
  '''分類モデルの構造を定義'''
  model = Sequential([
                      resize_and_rescale, # 前処理用のレイヤー
                      data_augmentation, # 前処理用のレイヤー
                      _fetch_hub_keras_layer(hub_url, trainable=hub_layer_trainable),
                      layers.Dense(output_shape, activation='softmax')
  ])
  model.build(input_shape)
  return model


TF_HUB_URL = 'https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4' #@param{type:'string'}
# 画像をリサイズする層があるためinputの画像サイズは未指定
MODEL_INPUT_SHAPE = (None, None, None, 3)
CLASS_NUM = ds_info.features[y_name].num_classes
FINE_TUNING = False

clf = clf_model_fn(hub_url=TF_HUB_URL, input_shape=MODEL_INPUT_SHAPE,
                   output_shape=CLASS_NUM, hub_layer_trainable=FINE_TUNING)
clf.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resize_and_rescale (Sequenti (None, 224, 224, 3)       0         
_________________________________________________________________
train_data_augmentation (Seq (None, 224, 224, 3)       0         
_________________________________________________________________
keras_layer (KerasLayer)     (None, 1280)              2257984   
_________________________________________________________________
dense (Dense)                (None, 8)                 10248     
Total params: 2,268,232
Trainable params: 10,248
Non-trainable params: 2,257,984
_________________________________________________________________


In [6]:
es = EarlyStopping(patience=3)

clf.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(0.001),
    metrics=['accuracy'],
)

In [7]:
EPOCHS = 100 #@param{type:'number'}

# バッチサイズはDatasetをfetchする際に指定済み
clf.fit(ds_train, epochs=EPOCHS,
        validation_data=ds_val,
        callbacks=[es])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


<tensorflow.python.keras.callbacks.History at 0x7f8fd017e8d0>

# モデルの評価

### 簡易的な評価

In [8]:
%time
def evalute_model(model, ds):
  '''モデルの評価関数呼び出し'''
  score = model.evaluate(ds)
  return score


train_score = evalute_model(clf, ds_train)
val_score = evalute_model(clf, ds_val)
test_score = evalute_model(clf, ds_test)
print('訓練 loss:{loss}, 正答率:{auc}'.format(loss=train_score[0], auc=train_score[1]))
print('検証 loss:{loss}, 正答率:{auc}'.format(loss=val_score[0], auc=val_score[1]))
print('テスト loss:{loss}, 正答率:{auc}'.format(loss=test_score[0], auc=test_score[1]))

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 7.63 µs
訓練 loss:0.4026747941970825, 正答率:0.8606666922569275
検証 loss:0.485822856426239, 正答率:0.8450000286102295
テスト loss:0.4052494466304779, 正答率:0.8579999804496765
