# 「部品基盤のはんだ付け状態」の良品、不良品を判定
#  前処理で保存した特徴量、ターゲットから
#  このモデルをベースラインとする

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import keras as ks
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, RMSprop
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.utils import np_utils
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score, f1_score

import os
import glob

%matplotlib inline


Using TensorFlow backend.


In [2]:
# 訓練データ読み出し
X_train0 = np.load('X_train.npy')
y_train0 = np.load('y_train.npy')

In [3]:
# 訓練とテストデータに分割
X_train2, X_test2, y_train, y_test = train_test_split(X_train0, y_train0, test_size=0.2, random_state=42)

In [4]:
X_train2[0]

array([[[201., 201., 201.],
        [192., 192., 192.],
        [206., 206., 206.],
        ...,
        [200., 200., 200.],
        [203., 203., 203.],
        [194., 194., 194.]],

       [[196., 196., 196.],
        [207., 207., 207.],
        [210., 210., 210.],
        ...,
        [199., 199., 199.],
        [202., 202., 202.],
        [196., 196., 196.]],

       [[199., 199., 199.],
        [207., 207., 207.],
        [207., 207., 207.],
        ...,
        [197., 197., 197.],
        [203., 203., 203.],
        [196., 196., 196.]],

       ...,

       [[150., 150., 150.],
        [179., 179., 179.],
        [190., 190., 190.],
        ...,
        [191., 191., 191.],
        [197., 197., 197.],
        [204., 204., 204.]],

       [[166., 166., 166.],
        [191., 191., 191.],
        [179., 179., 179.],
        ...,
        [193., 193., 193.],
        [200., 200., 200.],
        [204., 204., 204.]],

       [[162., 162., 162.],
        [187., 187., 187.],
        [180., 1

In [5]:
X_train2[0].max(), X_train2[0].min()

(255.0, 0.0)

In [6]:
X_test2[0]

array([[[210., 210., 210.],
        [219., 219., 219.],
        [207., 207., 207.],
        ...,
        [201., 201., 201.],
        [210., 210., 210.],
        [202., 202., 202.]],

       [[209., 209., 209.],
        [213., 213., 213.],
        [212., 212., 212.],
        ...,
        [211., 211., 211.],
        [198., 198., 198.],
        [209., 209., 209.]],

       [[205., 205., 205.],
        [212., 212., 212.],
        [206., 206., 206.],
        ...,
        [202., 202., 202.],
        [208., 208., 208.],
        [212., 212., 212.]],

       ...,

       [[211., 211., 211.],
        [212., 212., 212.],
        [218., 218., 218.],
        ...,
        [207., 207., 207.],
        [214., 214., 214.],
        [208., 208., 208.]],

       [[202., 202., 202.],
        [220., 220., 220.],
        [209., 209., 209.],
        ...,
        [217., 217., 217.],
        [196., 196., 196.],
        [215., 215., 215.]],

       [[209., 209., 209.],
        [209., 209., 209.],
        [220., 2

In [7]:
X_test2[0].max(), X_test2[0].min()

(255.0, 1.0)

In [8]:
X_train = preprocess_input(X_train2)
X_test = preprocess_input(X_test2)

In [9]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((320, 160, 160, 3), (320,), (80, 160, 160, 3), (80,))

In [10]:
X_train[0]

array([[[ 97.061   ,  84.221   ,  77.32    ],
        [ 88.061   ,  75.221   ,  68.32    ],
        [102.061   ,  89.221   ,  82.32    ],
        ...,
        [ 96.061   ,  83.221   ,  76.32    ],
        [ 99.061   ,  86.221   ,  79.32    ],
        [ 90.061   ,  77.221   ,  70.32    ]],

       [[ 92.061   ,  79.221   ,  72.32    ],
        [103.061   ,  90.221   ,  83.32    ],
        [106.061   ,  93.221   ,  86.32    ],
        ...,
        [ 95.061   ,  82.221   ,  75.32    ],
        [ 98.061   ,  85.221   ,  78.32    ],
        [ 92.061   ,  79.221   ,  72.32    ]],

       [[ 95.061   ,  82.221   ,  75.32    ],
        [103.061   ,  90.221   ,  83.32    ],
        [103.061   ,  90.221   ,  83.32    ],
        ...,
        [ 93.061   ,  80.221   ,  73.32    ],
        [ 99.061   ,  86.221   ,  79.32    ],
        [ 92.061   ,  79.221   ,  72.32    ]],

       ...,

       [[ 46.060997,  33.221   ,  26.32    ],
        [ 75.061   ,  62.221   ,  55.32    ],
        [ 86.061   ,  

In [11]:
X_train[0].max(), X_train[0].min()

(151.061, -123.68)

In [12]:
X_train2[0]

array([[[ 77.32    ,  84.221   ,  97.061   ],
        [ 68.32    ,  75.221   ,  88.061   ],
        [ 82.32    ,  89.221   , 102.061   ],
        ...,
        [ 76.32    ,  83.221   ,  96.061   ],
        [ 79.32    ,  86.221   ,  99.061   ],
        [ 70.32    ,  77.221   ,  90.061   ]],

       [[ 72.32    ,  79.221   ,  92.061   ],
        [ 83.32    ,  90.221   , 103.061   ],
        [ 86.32    ,  93.221   , 106.061   ],
        ...,
        [ 75.32    ,  82.221   ,  95.061   ],
        [ 78.32    ,  85.221   ,  98.061   ],
        [ 72.32    ,  79.221   ,  92.061   ]],

       [[ 75.32    ,  82.221   ,  95.061   ],
        [ 83.32    ,  90.221   , 103.061   ],
        [ 83.32    ,  90.221   , 103.061   ],
        ...,
        [ 73.32    ,  80.221   ,  93.061   ],
        [ 79.32    ,  86.221   ,  99.061   ],
        [ 72.32    ,  79.221   ,  92.061   ]],

       ...,

       [[ 26.32    ,  33.221   ,  46.060997],
        [ 55.32    ,  62.221   ,  75.061   ],
        [ 66.32    ,  

In [13]:
X_train2[0].max(), X_train2[0].min()

(151.061, -123.68)

In [14]:
# テストデータ読み出し
X_valid = np.load('X_test.npy')
y_valid_Image = np.load('y_test_file.npy', allow_pickle=True)

In [15]:
X_valid[0]

array([[[171., 171., 171.],
        [178., 178., 178.],
        [184., 184., 184.],
        ...,
        [172., 172., 172.],
        [178., 178., 178.],
        [180., 180., 180.]],

       [[184., 184., 184.],
        [176., 176., 176.],
        [180., 180., 180.],
        ...,
        [177., 177., 177.],
        [178., 178., 178.],
        [173., 173., 173.]],

       [[173., 173., 173.],
        [177., 177., 177.],
        [173., 173., 173.],
        ...,
        [186., 186., 186.],
        [143., 143., 143.],
        [186., 186., 186.]],

       ...,

       [[169., 169., 169.],
        [188., 188., 188.],
        [156., 156., 156.],
        ...,
        [189., 189., 189.],
        [179., 179., 179.],
        [177., 177., 177.]],

       [[157., 157., 157.],
        [202., 202., 202.],
        [165., 165., 165.],
        ...,
        [168., 168., 168.],
        [170., 170., 170.],
        [165., 165., 165.]],

       [[155., 155., 155.],
        [143., 143., 143.],
        [193., 1

In [16]:
X_valid[0].max(), X_valid[0].min()

(254.0, 0.0)

In [17]:
X_valid = preprocess_input(X_valid)

In [18]:
X_valid.shape, y_valid_Image.shape

((213, 160, 160, 3), (213,))

In [19]:
base_model = VGG16(weights='imagenet', include_top=False, pooling='avg')

In [20]:
base_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [21]:
# モデル生成
model = Sequential([
    base_model, 
])

In [22]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 512)               14714688  
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________


In [23]:
# # 全結合層
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# # ドロップアウト
# model.add(Dropout(0.5))

# 出力層 二項分類なので sigmoid
model.add(Dense(1, activation='sigmoid'))

In [24]:
base_model.trainable = False

In [25]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 512)               14714688  
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 513       
Total params: 14,715,201
Trainable params: 513
Non-trainable params: 14,714,688
_________________________________________________________________


In [26]:
# コンパイル
# 2項分類なので損失関数は [binary_crossentropy]
# 最適化は Adam
model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=0.005),
              metrics=['binary_accuracy'])

In [27]:
# EarlyStopping
callbaks = EarlyStopping(monitor='val_loss', patience=5)

In [28]:
# モデル訓練
model.fit(X_train, y_train, batch_size=16, validation_data=(X_test, y_test), callbacks=[callbaks], epochs=50)
# model.fit(X_train, y_train, callbacks=[callbaks], epochs=20)


Train on 320 samples, validate on 80 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50


<keras.callbacks.callbacks.History at 0x274123e4898>

In [29]:
base_y_pred = model.predict_classes(X_test)

In [30]:
base_y_pred[:5]

array([[1],
       [0],
       [1],
       [0],
       [0]])

In [31]:
confusion_matrix(y_test, base_y_pred)

array([[10,  6],
       [ 7, 57]], dtype=int64)

In [32]:
# 正解率
accuracy_score(y_test, base_y_pred)

0.8375

In [33]:
# 適合率
precision_score(y_test, base_y_pred)

0.9047619047619048

In [34]:
# 再現率
recall_score(y_test, base_y_pred)

0.890625

In [35]:
# F1メジャー
f1_score(y_test, base_y_pred)

0.8976377952755906

In [36]:
y_train.sum(), y_test.sum()

(236, 64)

In [37]:
y_valid = model.predict_classes(X_valid)

In [38]:
y_valid[:5], y_valid[6].flatten()

(array([[0],
        [1],
        [0],
        [0],
        [1]]),
 array([1]))

In [39]:
y_valid.sum(), type(y_valid), y_valid[0]

(98, numpy.ndarray, array([0]))

In [40]:
result_df = pd.DataFrame([y_valid_Image, y_valid]).T

In [41]:
result_df.rename(columns={0: 'Image', 1: 'y'}, inplace=True)

In [42]:
result_df[:5]

Unnamed: 0,Image,y
0,../data/test/test\000.jpeg,[0]
1,../data/test/test\001.jpeg,[1]
2,../data/test/test\002.jpeg,[0]
3,../data/test/test\003.jpeg,[0]
4,../data/test/test\004.jpeg,[1]


In [43]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 213 entries, 0 to 212
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Image   213 non-null    object
 1   y       213 non-null    object
dtypes: object(2)
memory usage: 3.5+ KB


In [44]:
result_df['Image'] = result_df['Image'].str.replace(r'../data/test/test\\', "")

In [45]:
# result_df[0] = result_df[0].str.replace('\\', "")

In [46]:
result_df['y'][result_df['y'] == 1] = 1
result_df['y'][result_df['y'] == 0] = 0

In [47]:
result_df[:5]

Unnamed: 0,Image,y
0,000.jpeg,0
1,001.jpeg,1
2,002.jpeg,0
3,003.jpeg,0
4,004.jpeg,1


In [48]:
result_df.y.sum()

98

In [49]:
result_df.to_csv('./VGG16-400-1.tsv', index=False, header=False, sep='\t')

In [50]:
result_df[result_df['y'] == 1]

Unnamed: 0,Image,y
1,001.jpeg,1
4,004.jpeg,1
6,006.jpeg,1
8,008.jpeg,1
15,015.jpeg,1
...,...,...
206,206.jpeg,1
208,208.jpeg,1
209,209.jpeg,1
211,211.jpeg,1


In [51]:
type(result_df['y'][0])

int

In [52]:
print("({0})".format(result_df['y'][0]))

(0)


In [53]:
np.where(y_test[:] == 1).count

<function tuple.count>

In [54]:
np.count_nonzero(y_train == 1), np.count_nonzero(y_train == 0)

(236, 84)

In [55]:
K.image_data_format()

'channels_last'