# 「部品基盤のはんだ付け状態」の良品、不良品を判定
#  前処理で保存した特徴量、ターゲットから
#  このモデルをベースラインとする

In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import keras as ks
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, RMSprop
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.utils import np_utils
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
from keras.applications.vgg19 import VGG19, preprocess_input
from keras.preprocessing import image

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score, f1_score

import os
import glob

%matplotlib inline


Using TensorFlow backend.


In [2]:
# 訓練データ読み出し
X_train0 = np.load('X_train.npy')
y_train0 = np.load('y_train.npy')


In [3]:
X_train0.shape, y_train0.shape

((400, 160, 160, 3), (400,))

In [4]:
# 訓練とテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X_train0, y_train0, test_size=0.2, random_state=42)

In [5]:
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)

In [6]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((320, 160, 160, 3), (320,), (80, 160, 160, 3), (80,))

In [7]:
X_train[0]

array([[[ 97.061   ,  84.221   ,  77.32    ],
        [ 88.061   ,  75.221   ,  68.32    ],
        [102.061   ,  89.221   ,  82.32    ],
        ...,
        [ 96.061   ,  83.221   ,  76.32    ],
        [ 99.061   ,  86.221   ,  79.32    ],
        [ 90.061   ,  77.221   ,  70.32    ]],

       [[ 92.061   ,  79.221   ,  72.32    ],
        [103.061   ,  90.221   ,  83.32    ],
        [106.061   ,  93.221   ,  86.32    ],
        ...,
        [ 95.061   ,  82.221   ,  75.32    ],
        [ 98.061   ,  85.221   ,  78.32    ],
        [ 92.061   ,  79.221   ,  72.32    ]],

       [[ 95.061   ,  82.221   ,  75.32    ],
        [103.061   ,  90.221   ,  83.32    ],
        [103.061   ,  90.221   ,  83.32    ],
        ...,
        [ 93.061   ,  80.221   ,  73.32    ],
        [ 99.061   ,  86.221   ,  79.32    ],
        [ 92.061   ,  79.221   ,  72.32    ]],

       ...,

       [[ 46.060997,  33.221   ,  26.32    ],
        [ 75.061   ,  62.221   ,  55.32    ],
        [ 86.061   ,  

In [8]:
# テストデータ読み出し
X_valid = np.load('X_test.npy')
y_valid_Image = np.load('y_test_file.npy', allow_pickle=True)

In [9]:
X_valid = preprocess_input(X_valid)

In [10]:
X_valid.shape, y_valid_Image.shape

((213, 160, 160, 3), (213,))

In [11]:
base_model = VGG19(weights='imagenet', include_top=False, pooling='avg')

In [12]:
base_model.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [13]:
# モデル生成
model = Sequential([
    base_model, 
])

In [14]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Model)                (None, 512)               20024384  
Total params: 20,024,384
Trainable params: 20,024,384
Non-trainable params: 0
_________________________________________________________________


In [15]:
# # 全結合層
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# # ドロップアウト
# model.add(Dropout(0.5))

# 出力層 二項分類なので sigmoid
model.add(Dense(1, activation='sigmoid'))

In [16]:
base_model.trainable = False

In [17]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Model)                (None, 512)               20024384  
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 513       
Total params: 20,024,897
Trainable params: 513
Non-trainable params: 20,024,384
_________________________________________________________________


In [18]:
# コンパイル
# 2項分類なので損失関数は [binary_crossentropy]
# 最適化は Adam
model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=0.005),
              metrics=['accuracy'])

In [19]:
# EarlyStopping
callbaks = EarlyStopping(monitor='val_loss', patience=10)

In [20]:
# モデル訓練
model.fit(X_train, y_train, batch_size=16, validation_data=(X_test, y_test), callbacks=[callbaks], epochs=25)
# model.fit(X_train, y_train, callbacks=[callbaks], epochs=20)


Train on 320 samples, validate on 80 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.callbacks.History at 0x1b32f717128>

In [21]:
base_y_pred = model.predict_classes(X_test)

In [22]:
base_y_pred[:5]

array([[1],
       [1],
       [1],
       [0],
       [0]])

In [23]:
confusion_matrix(y_test, base_y_pred)

array([[ 7,  9],
       [ 5, 59]], dtype=int64)

In [24]:
# 正解率
accuracy_score(y_test, base_y_pred)

0.825

In [25]:
# 適合率
precision_score(y_test, base_y_pred)

0.8676470588235294

In [26]:
# 再現率
recall_score(y_test, base_y_pred)

0.921875

In [27]:
# F1メジャー
f1_score(y_test, base_y_pred)

0.893939393939394

In [28]:
y_valid = model.predict_classes(X_valid)

In [29]:
y_valid[:5], y_valid[6].flatten()

(array([[1],
        [0],
        [0],
        [0],
        [1]]),
 array([1]))

In [30]:
y_valid.sum(), type(y_valid), y_valid[0]

(114, numpy.ndarray, array([1]))

In [31]:
result_df = pd.DataFrame([y_valid_Image, y_valid]).T

In [32]:
result_df.rename(columns={0: 'Image', 1: 'y'}, inplace=True)

In [33]:
result_df[:5]

Unnamed: 0,Image,y
0,../data/test/test\000.jpeg,[1]
1,../data/test/test\001.jpeg,[0]
2,../data/test/test\002.jpeg,[0]
3,../data/test/test\003.jpeg,[0]
4,../data/test/test\004.jpeg,[1]


In [34]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 213 entries, 0 to 212
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Image   213 non-null    object
 1   y       213 non-null    object
dtypes: object(2)
memory usage: 3.5+ KB


In [35]:
result_df['Image'] = result_df['Image'].str.replace(r'../data/test/test\\', "")

In [36]:
# result_df[0] = result_df[0].str.replace('\\', "")

In [37]:
result_df['y'][result_df['y'] == 1] = 1
result_df['y'][result_df['y'] == 0] = 0

In [38]:
result_df[:5]

Unnamed: 0,Image,y
0,000.jpeg,1
1,001.jpeg,0
2,002.jpeg,0
3,003.jpeg,0
4,004.jpeg,1


In [39]:
result_df.y.sum()

114

In [40]:
result_df.to_csv('./VGG19-600-1.tsv', index=False, header=False, sep='\t')

In [41]:
# result_df[result_df['y'] == 1]

In [42]:
type(result_df['y'][0])

int

In [43]:
print("({0})".format(result_df['y'][0]))

(1)


In [44]:
np.where(y_test[:] == 1).count

<function tuple.count>

In [45]:
np.count_nonzero(y_train == 1), np.count_nonzero(y_train == 0)

(236, 84)

In [46]:
K.image_data_format()

'channels_last'