# 「部品基盤のはんだ付け状態」の良品、不良品を判定
#  前処理で保存した特徴量、ターゲットから
#  このモデルをベースラインとする

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import keras as ks
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, RMSprop
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.utils import np_utils
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.preprocessing import image

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score, f1_score

import os
import glob

%matplotlib inline


Using TensorFlow backend.


In [2]:
# 訓練データ読み出し
X_train0 = np.load('X_train.npy')
y_train0 = np.load('y_train.npy')


In [3]:
# 訓練とテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X_train0, y_train0, test_size=0.2, random_state=42)

In [4]:
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)

In [5]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((320, 160, 160, 3), (320,), (80, 160, 160, 3), (80,))

In [6]:
# テストデータ読み出し
X_valid = np.load('X_test.npy')
y_valid_Image = np.load('y_test_file.npy', allow_pickle=True)

In [7]:
X_valid = preprocess_input(X_valid)

In [8]:
X_valid.shape, y_valid_Image.shape

((213, 160, 160, 3), (213,))

In [9]:
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

In [10]:
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, None, None, 6 256         conv1[0][0]                      
___________________________________________________________________________________________

In [11]:
# モデル生成
model = Sequential([
    base_model, 
])

In [12]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
_________________________________________________________________


In [13]:
# # 全結合層
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# # ドロップアウト
# model.add(Dropout(0.5))

# 出力層 二項分類なので sigmoid
model.add(Dense(1, activation='sigmoid'))

In [14]:
base_model.trainable = False

In [15]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 2049      
Total params: 23,589,761
Trainable params: 2,049
Non-trainable params: 23,587,712
_________________________________________________________________


In [16]:
# コンパイル
# 2項分類なので損失関数は [binary_crossentropy]
# 最適化は Adam
model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=0.00005),
              metrics=['accuracy'])

In [17]:
# EarlyStopping
callbaks = EarlyStopping(monitor='val_loss', patience=15)
# callbaks = EarlyStopping(monitor=['accuracy', 'val_loss'], patience=7)

In [18]:
# モデル訓練
model.fit(X_train, y_train, batch_size=16, validation_data=(X_test, y_test), callbacks=[callbaks], epochs=20)
# model.fit(X_train, y_train, callbacks=[callbaks], epochs=20)


Train on 320 samples, validate on 80 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x1a61aaa0160>

In [19]:
base_y_pred = model.predict_classes(X_test)

In [20]:
base_y_pred[:5], y_test[:5]

(array([[1],
        [0],
        [1],
        [1],
        [1]]),
 array([1, 1, 1, 1, 1]))

In [21]:
confusion_matrix(y_test, base_y_pred)

array([[ 3, 13],
       [ 4, 60]], dtype=int64)

In [22]:
# 正解率
accuracy_score(y_test, base_y_pred)

0.7875

In [23]:
# 適合率
precision_score(y_test, base_y_pred)

0.821917808219178

In [24]:
# 再現率
recall_score(y_test, base_y_pred)

0.9375

In [25]:
# F1メジャー
f1_score(y_test, base_y_pred)

0.8759124087591241

In [26]:
y_valid = model.predict_classes(X_valid)

In [27]:
y_valid[:5], y_valid[6].flatten()

(array([[1],
        [0],
        [1],
        [0],
        [1]]),
 array([1]))

In [28]:
y_valid.sum(), type(y_valid), y_valid[0]

(147, numpy.ndarray, array([1]))

In [29]:
result_df = pd.DataFrame([y_valid_Image, y_valid]).T

In [30]:
result_df.rename(columns={0: 'Image', 1: 'y'}, inplace=True)

In [31]:
result_df[:5]

Unnamed: 0,Image,y
0,../data/test/test\000.jpeg,[1]
1,../data/test/test\001.jpeg,[0]
2,../data/test/test\002.jpeg,[1]
3,../data/test/test\003.jpeg,[0]
4,../data/test/test\004.jpeg,[1]


In [32]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 213 entries, 0 to 212
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Image   213 non-null    object
 1   y       213 non-null    object
dtypes: object(2)
memory usage: 3.5+ KB


In [33]:
result_df['Image'] = result_df['Image'].str.replace(r'../data/test/test\\', "")

In [34]:
# result_df[0] = result_df[0].str.replace('\\', "")

In [35]:
result_df['y'][result_df['y'] == 1] = 1
result_df['y'][result_df['y'] == 0] = 0

In [36]:
result_df[:5]

Unnamed: 0,Image,y
0,000.jpeg,1
1,001.jpeg,0
2,002.jpeg,1
3,003.jpeg,0
4,004.jpeg,1


In [37]:
result_df.y.sum()

147

In [38]:
result_df.to_csv('./RES50-600-1.tsv', index=False, header=False, sep='\t')

In [39]:
result_df.y.sum()

147

In [40]:
# result_df[result_df['y'] == 1]

In [41]:
type(result_df['y'][0])

int

In [42]:
print("({0})".format(result_df['y'][0]))

(1)


In [43]:
np.where(y_test[:] == 1).count

<function tuple.count>

In [44]:
np.count_nonzero(y_train == 1), np.count_nonzero(y_train == 0)

(236, 84)

In [45]:
K.image_data_format()

'channels_last'