# 下準備

## ChainerとCuPyの用意

In [0]:
!pip install chainer




In [0]:
import chainer

In [0]:
# import cupy

## Chainerのランタイム情報を表示する

In [0]:
chainer.print_runtime_info()
print('GPU:', chainer.cuda.available)
print('cuDNN:', chainer.cuda.cudnn_enabled)

Platform: Linux-4.14.79+-x86_64-with-Ubuntu-18.04-bionic
Chainer: 5.4.0
NumPy: 1.16.3
CuPy:
  CuPy Version          : 5.4.0
  CUDA Root             : /usr/local/cuda
  CUDA Build Version    : 10000
  CUDA Driver Version   : 10000
  CUDA Runtime Version  : 10000
  cuDNN Build Version   : 7301
  cuDNN Version         : 7301
  NCCL Build Version    : 2402
  NCCL Runtime Version  : 2402
iDeep: 2.0.0.post3
GPU: True
cuDNN: True


# データセットのダウンロード


Kaggleからデータを取得してください。https://www.kaggle.com/c/dogs-vs-cats

## ダウンロードしたファイルを確認する

ダウンロードしたファイルは、dogscats.zipと言うファイル名とします。

In [0]:
ls

dogscats.zip  [0m[01;34msample_data[0m/


## データセットを解凍する

In [0]:
!unzip dogscats.zip

Archive:  dogscats.zip
   creating: dogscats/
   creating: dogscats/sample/
   creating: dogscats/sample/train/
   creating: dogscats/sample/train/cats/
  inflating: dogscats/sample/train/cats/cat.2921.jpg  
  inflating: dogscats/sample/train/cats/cat.394.jpg  
  inflating: dogscats/sample/train/cats/cat.4865.jpg  
  inflating: dogscats/sample/train/cats/cat.3570.jpg  
  inflating: dogscats/sample/train/cats/cat.2266.jpg  
  inflating: dogscats/sample/train/cats/cat.9021.jpg  
  inflating: dogscats/sample/train/cats/cat.11737.jpg  
  inflating: dogscats/sample/train/cats/cat.4600.jpg  
   creating: dogscats/sample/train/dogs/
  inflating: dogscats/sample/train/dogs/dog.1402.jpg  
  inflating: dogscats/sample/train/dogs/dog.1614.jpg  
  inflating: dogscats/sample/train/dogs/dog.8643.jpg  
  inflating: dogscats/sample/train/dogs/dog.6391.jpg  
  inflating: dogscats/sample/train/dogs/dog.2423.jpg  
  inflating: dogscats/sample/train/dogs/dog.9077.jpg  
  inflating: dogscats/sample/train/d

# 調理手順

## 学習データ確認する（任意画像）

In [0]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
train_image_path = './dogscats/train/cats/cat.3533.jpg'
Image.open(train_image_path)

## 検証データを確認する（任意画像）

In [0]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
valid_image_path = './dogscats/valid/cats/cat.4282.jpg'
Image.open(valid_image_path)

## 学習データと検証データを分ける



In [0]:
from chainer import datasets
cats_images_train_path = 'dogscats/train/cats/'
dogs_images_train_path = 'dogscats/train/dogs/'
cats_images_valid_path = 'dogscats/valid/cats/'
dogs_images_valid_path = 'dogscats/valid/dogs/'

In [0]:
image_and_teacher_label_list = []

## 関数の定義 get_image_teacher_label_list()の定義

In [0]:
import os

def get_image_teacher_label_list(dir, label):
  filepath_list = []
  files = os.listdir(dir)
  for file in files:
    filepath_list.append((dir + file, label))
  return filepath_list

## 学習と検証データをリストにする



In [0]:
# 学習データ猫の画像のフォルダ、ラベルは０：猫です
image_and_teacher_label_list.extend(get_image_teacher_label_list(cats_images_train_path, 0))
# 学習データ犬の画像のフォルダ、ラベルは１：犬です
image_and_teacher_label_list.extend(get_image_teacher_label_list(dogs_images_train_path, 1))
# 検証データ猫の画像のフォルダ、ラベルは０：猫です
image_and_teacher_label_list.extend(get_image_teacher_label_list(cats_images_valid_path, 0))
# 検証データ犬の画像のフォルダ、ラベルは１：犬です
image_and_teacher_label_list.extend(get_image_teacher_label_list(dogs_images_valid_path, 1))

In [0]:
print(image_and_teacher_label_list)

[('dogscats/train/cats/cat.9437.jpg', 0), ('dogscats/train/cats/cat.10925.jpg', 0), ('dogscats/train/cats/cat.7916.jpg', 0), ('dogscats/train/cats/cat.3395.jpg', 0), ('dogscats/train/cats/cat.5522.jpg', 0), ('dogscats/train/cats/cat.7106.jpg', 0), ('dogscats/train/cats/cat.2729.jpg', 0), ('dogscats/train/cats/cat.8929.jpg', 0), ('dogscats/train/cats/cat.3876.jpg', 0), ('dogscats/train/cats/cat.9398.jpg', 0), ('dogscats/train/cats/cat.11534.jpg', 0), ('dogscats/train/cats/cat.2571.jpg', 0), ('dogscats/train/cats/cat.5987.jpg', 0), ('dogscats/train/cats/cat.6688.jpg', 0), ('dogscats/train/cats/cat.7733.jpg', 0), ('dogscats/train/cats/cat.9050.jpg', 0), ('dogscats/train/cats/cat.5401.jpg', 0), ('dogscats/train/cats/cat.4682.jpg', 0), ('dogscats/train/cats/cat.8844.jpg', 0), ('dogscats/train/cats/cat.6645.jpg', 0), ('dogscats/train/cats/cat.9217.jpg', 0), ('dogscats/train/cats/cat.8786.jpg', 0), ('dogscats/train/cats/cat.7328.jpg', 0), ('dogscats/train/cats/cat.11270.jpg', 0), ('dogscats/t

## 画像データ形式の整備

In [0]:
# 画像データをChainerのConvolution2Dに使えるように整備します
# 最後を先頭(x,y,color) => (color,x,y)
def data_reshape(width_height_channel_image):
  image_array = np.array(width_height_channel_image)
  return image_array.transpose(2, 0, 1)

入力画像のサイズを決めます。

In [0]:
INPUT_WIDTH = 128
INPUT_HEIGHT = 128

## データ形状変換の結果確認

In [0]:
import cv2
import matplotlib.pyplot as plt
# 配列形状変換する前の画像データのshape （画像の幅、高さ、チャンネル）
image_before_reshape= cv2.imread('./dogscats/train/cats/cat.9021.jpg')
print(image_before_reshape.shape)

# 配列形状変換した後の画像データのshape
image_after_reshape = data_reshape(image_before_reshape)
print(image_after_reshape.shape)

(251, 216, 3)
(3, 251, 216)


##  画像の前処理関数　adapt_data_to_convolution2d_format()





In [0]:
def adapt_data_to_convolution2d_format(input_image):
  image , label = input_image
  
  # image のデータを8ビットの符号なし整数に変換します
  image = image.astype(np.uint8)
  # Chainerの中データを用意する段階で、image.transpose(2, 0, 1)しましたので、（最後を先頭 (x,y,color)=> (color,x,y))
  # 正しくリサイズできるため、一回データの構造を戻して（先頭を最後に => (x,y,color))
  image = Image.fromarray(image.transpose(1, 2, 0))

  # 共通の画像のリサイズ処理です。第5章の１番目のレシピを参照してください
  result_image = image.resize((INPUT_WIDTH, INPUT_HEIGHT), Image.LANCZOS)
  
  # リサイズしたら、画像データをChainerのConvolution2Dに使えるように戻します（もう一回最後を先頭に  => (color,x,y))
  image = data_reshape(result_image)
  
  # データを0～1の間の値に変換します
  image = image.astype(np.float32) / 255

  return image, label

## データセット作成

In [0]:
from chainer.datasets import LabeledImageDataset
dogscats_dataset= LabeledImageDataset(image_and_teacher_label_list)

In [0]:
from chainer.datasets import TransformDataset
transformed_dataset = TransformDataset(dogscats_dataset, adapt_data_to_convolution2d_format)

## 学習データと検証データを分ける


In [0]:
from chainer import datasets

# 前処理済のデータを分けます
train_data, test_data= datasets.split_dataset_random(transformed_dataset, int(len(transformed_dataset) * 0.8), seed=0)

In [0]:
print(int(len(transformed_dataset) * 0.8))
print(len(train_data))
print(len(test_data))

20000
20000
5000


In [0]:
print(train_data)

<chainer.datasets.sub_dataset.SubDataset object at 0x7f1cd1ec87f0>


In [0]:
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training,serializers,Chain,datasets,sequential,optimizers,iterators
from chainer.training import extensions,Trainer

In [0]:
GPU_ID = 0
BATCH_SIZE = 64
MAX_EPOCH = 10

## CNNを設定する



In [0]:
class CNN(Chain):
  # コンストラクタ
  def __init__(self):
    super(CNN, self).__init__()

    with self.init_scope():
      self.conv1 = L.Convolution2D(None, out_channels=32, ksize=3, stride=1, pad=1)
      self.conv2 = L.Convolution2D(in_channels=32, out_channels=64, ksize=3, stride=1, pad=1)
      self.conv3 = L.Convolution2D(in_channels=64, out_channels=128, ksize=3, stride=1, pad=1)
      self.conv4 = L.Convolution2D(in_channels=128, out_channels=256, ksize=3, stride=1, pad=1)
      self.layer1 = L.Linear(None, 1000)
      self.layer2 = L.Linear(1000, 2)
  #
  def __call__(self, input):
    func = F.max_pooling_2d(F.relu(self.conv1(input)), ksize=2, stride=2)
    func = F.max_pooling_2d(F.relu(self.conv2(func)), ksize=2, stride=2)
    func = F.max_pooling_2d(F.relu(self.conv3(func)), ksize=2, stride=2)
    func = F.max_pooling_2d(F.relu(self.conv4(func)), ksize=2, stride=2)
    func = F.dropout(F.relu(self.layer1(func)), ratio=0.80)
    func = self.layer2(func)
    return func

In [0]:
model = L.Classifier(CNN())
model.to_gpu(GPU_ID)

<chainer.links.model.classifier.Classifier at 0x7f1cb83e61d0>

## 反復子

In [0]:
# 学習用の反復子
train_iterator = iterators.MultiprocessIterator(train_data, BATCH_SIZE)
# 検証用の反復子
test_iterator = iterators.MultiprocessIterator(test_data, BATCH_SIZE, False, False)

## Optimizerの設定

In [0]:
optimizer = optimizers.Adam().setup(model)

## Updaterの設定

In [0]:
updater = training.StandardUpdater(train_iterator, optimizer, device=GPU_ID)

## trainerの設定

In [0]:
trainer = Trainer(updater, stop_trigger=(MAX_EPOCH, 'epoch'))

## extensionの設定


In [0]:
trainer.extend(extensions.LogReport())
trainer.extend(extensions.Evaluator(test_iterator, model, device=GPU_ID), name='validation')
trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time']))

# レポートのグラフを出力するextension
trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', marker='^', grid=True, file_name='loss.png'))
trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch',marker='^' , grid=True, file_name='accuracy.png'))

## 学習実行

In [0]:
trainer.run()

epoch       main/loss   main/accuracy  validation/main/loss  validation/main/accuracy  elapsed_time
[J1           0.679944    0.602386       0.588261              0.701345                  153.276       
[J2           0.546872    0.721304       0.498922              0.765823                  293.927       
[J3           0.447833    0.792033       0.392759              0.825356                  436.147       
[J4           0.366081    0.83769        0.315681              0.86788                   578.165       


## 学習結果の確認

正解率（Accuracy）

In [0]:
Image.open('result/accuracy.png')

誤差

In [0]:
Image.open('result/loss.png')

## 検証する（学習済モデルを使う）

In [0]:
ls

## モデルを書き出す

In [0]:
serializers.save_hdf5("chainer-dogscats-model.h5", model)

In [0]:
ls

# ここからは応用のプロセス

学習済のモデルを使うプロセスです。


## 関数の定義 convert_test_data





In [0]:
def convert_test_data(image_file_path, size, show=False):

  image = Image.open(image_file_path)

  # 共通の画像のリサイズ処理です。第5章の１番目のレシピを参照してください
  result_image = image.resize((INPUT_WIDTH, INPUT_HEIGHT), Image.LANCZOS)
  
  # 画像データをChainerのConvolution2Dに使えるように整備します
  image = data_reshape(result_image)

  # 型をfloat32に変換します
  result = image.astype(np.float32)
  # 学習済みモデルに渡します
  result = model.xp.asarray(result)
  # モデルに渡すデータフォーマットに変換します
  result = result[None, ...]
  return result


## 検証用の写真を選ぶ

In [0]:
ls dogscats/test1

## 画像のサイズの設定

In [0]:
INPUT_WIDTH = 128
INPUT_HEIGHT = 128

In [0]:
test_image_url='dogscats/test1/6006.jpg'

In [0]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
image_path = test_image_url
Image.open(image_path)

In [0]:
from chainer.cuda import to_cpu
# 学習時と同じ画像のサイズにしなければいけません
test_data= convert_test_data(test_image_url, (INPUT_WIDTH, INPUT_HEIGHT))
with chainer.using_config('train', False), chainer.using_config(
            'enable_backprop', False):
  test_teacher_labels = model.predictor(test_data)
  test_teacher_labels = to_cpu(test_teacher_labels.array)
  test_teacher_label = test_teacher_labels.argmax(axis=1)[0]
  if test_teacher_label == 0:
    retval = '猫'
  else:
    retval = '犬'

print(retval)

## 学習済モデルをダウンロード

In [0]:
ls　 -al

In [0]:
# from google.colab import files
# files.download('chainer-dogscats-model.h5')

## 自分のGoogle Driveにドキュメントとして保存する

In [0]:
!pip install -U -q PyDrive

In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 認証とPyDrive クライアントの作成
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

## ファイルの作成

In [0]:
uploaded = drive.CreateFile({'title': 'chainer-dogscats-model.h5'})
uploaded.SetContentFile('chainer-dogscats-model.h5')

## 保存(Google Driveへのアップロード)

In [0]:
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))