##建立資料集
參考資料：Python機器學習與深度學習特訓班，碁峰出版社


In [None]:
import os,cv2,glob
from sklearn.model_selection import train_test_split
import numpy as np

np.random.seed(10)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#%cd '/content/drive/MyDrive/CNN_pets'

In [None]:
%cd '/content/drive/MyDrive/CNN_pets/PetImages'

# 將原始圖片 resize 後存在 images 串列，標籤存在 labels 串列
images=[]
labels=[]
dict_labels = {"Cat1":0, "Dog1":1, "Cat2":0, "Dog2":1}
size = (80,80) #由於原始資料影像大小不一，因此制定一個統一值
for folders in glob.glob("*"):
    print(folders,"圖片讀取中…")
    # 只讀取貓、狗圖片
    for filename in os.listdir(folders):
        #label=folders.split("\\")[-1]
        try:
            img=cv2.imread(os.path.join(folders,filename))
            img = cv2.resize(img,dsize=size)
            if img is not None:
                images.append(img)
                labels.append(dict_labels[folders])
        except:
            print(os.path.join(folders,filename),"無法讀取!")
            pass



In [None]:
print(len(images),len(labels))

In [None]:
X_train,X_test,y_train,y_test = \
    train_test_split(images,labels,test_size=0.2,random_state=1)

X_train = np.array(X_train) # 串列轉為矩陣
X_test = np.array(X_test)   # 串列轉為矩陣
y_train = np.array(y_train)     # 串列轉為矩陣
y_test = np.array(y_test)       # 串列轉為矩陣

print(len(X_train), len(X_test))  # 19956 4990
print(X_train.shape, y_train.shape) # (19956, 80, 80, 3) (19956,)
print(X_test.shape, y_test.shape)   # (4990, 80, 80, 3) (4990,)

In [None]:
X_train[0]

In [None]:
imagesavepath = 'pets_dataset/'
if not os.path.exists(imagesavepath):
    os.makedirs(imagesavepath)
np.save(imagesavepath+'X_train.npy', X_train)
np.save(imagesavepath+'X_test.npy', X_test)
np.save(imagesavepath+'y_train.npy', y_train)
np.save(imagesavepath+'y_test.npy', y_test)

##載入先前儲存的數據以建立模型

In [None]:
%cd '/content/drive/MyDrive/CNN_pets'

X_train = np.load('./Pets_Datasets/X_train.npy')
X_test = np.load('./Pets_Datasets/X_test.npy')
y_train = np.load('./Pets_Datasets/y_train.npy')
y_test = np.load('./Pets_Datasets/y_test.npy')

In [None]:
X_train.shape

In [None]:
import numpy as np
from keras.utils import np_utils
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dropout,Flatten,Dense
np.random.seed(10)

In [None]:
# 將 Features 特徵值換為 圖片數量*80*80*3 的 4 維矩陣
X_train_vector = X_train.reshape(len(X_train), 80,80,3).astype('float32')
X_test_vector = X_test.reshape(len( X_test), 80,80,3).astype('float32')

#Features 特徵值標準化
X_train_normalize = X_train_vector/255
X_test_normalize = X_test_vector/255

#label 轉換為 One-Hot Encoding 編碼
y_train_onehot = np_utils.to_categorical(y_train)
y_test_onehot = np_utils.to_categorical(y_test)



In [None]:
#建立模型
model = Sequential()
#建立卷積層1
model.add(Conv2D(filters=10, kernel_size=(5,5),padding='same',input_shape=(80,80,3), activation='relu'))

#建立池化層1
model.add(MaxPooling2D(pool_size=(2, 2))) #(10,40,40)

# Dropout層防止過度擬合，斷開比例:0.1
model.add(Dropout(0.1))

#建立卷積層2
model.add(Conv2D(filters=20, kernel_size=(5,5),padding='same',activation='relu'))

#建立池化層2
model.add(MaxPooling2D(pool_size=(2, 2))) #(20,20,20)

# Dropout層防止過度擬合，斷開比例:0.2
model.add(Dropout(0.2))

#建立平坦層：20*20*20=8000 個神經元
model.add(Flatten())

#建立隱藏層
model.add(Dense(units=512, activation='relu'))

#建立輸出層
model.add(Dense(units=2,activation='softmax'))

model.summary() #顯示模型

In [None]:
 #定義訓練方式
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#以(train_feature_normalize,train_label_onehot)資料訓練，
#訓練資料保留 20%作驗證,訓練10次、每批次讀取200筆資料，顯示訓練過程
train_history =model.fit(x=X_train_normalize,
                         y=y_train_onehot,validation_split=0.2,
                         epochs=10, batch_size=200,verbose=1)

In [None]:
#評估準確率
scores = model.evaluate(X_test_normalize, y_test_onehot)
print('\n準確率=',scores[1])

In [None]:
def show_images_labels_predictions(images,labels,
                                  predictions,start_id,num=10):
    plt.gcf().set_size_inches(12, 14)
    if num>25: num=25
    for i in range(0, num):
        ax=plt.subplot(5,5, 1+i)
        #顯示彩色圖片
        ax.imshow(images[start_id])

        # 有 AI 預測結果資料, 才在標題顯示預測結果
        if( len(predictions) > 0 ) :
            title = 'ai = ' + str(predictions[start_id])
            # 預測正確顯示(o), 錯誤顯示(x)
            title += (' (o)' if predictions[start_id]==labels[start_id] else ' (x)')
            title += '\nlabel = ' + str(labels[start_id])
        # 沒有 AI 預測結果資料, 只在標題顯示真實數值
        else :
            title = 'label = ' + str(labels[start_id])

        # X, Y 軸不顯示刻度
        ax.set_title(title,fontsize=12)
        ax.set_xticks([]);ax.set_yticks([])
        start_id+=1
    plt.show()


In [None]:
#預測
prediction = np.argmax(model.predict(X_test_normalize), axis = 1)

#顯示圖像、預測值、真實值
show_images_labels_predictions(X_test, y_test, prediction, 0)

## 儲存模型，以分次進行訓練
### 請重啟執行階段，重新載入資料集，進行資料預處理及建立模型

In [None]:
 #定義訓練方式
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#以(train_feature_normalize,train_label_onehot)資料訓練，
#訓練資料保留 20% 作驗證,訓練2次、每批次讀取200筆資料，顯示訓練過程
train_history =model.fit(x=X_train_normalize,
                         y=y_train_onehot,validation_split=0.2,
                         epochs=2, batch_size=200,verbose=1)

#評估準確率
scores = model.evaluate(X_test_normalize, y_test_onehot)
print('\n準確率=',scores[1])

#預測
prediction = np.argmax(model.predict(X_test_normalize), axis = 1)

#顯示圖像、預測值、真實值
show_images_labels_predictions(X_test, y_test, prediction, 0)

In [None]:
 # 儲存模型
model.save('Pet_cnn_model.h5')
print("Pet_cnn_model.h5 模型儲存完畢!")
model.save_weights("Pet_cnn_model.weight")
print("Pet_cnn_model.weight 模型參數儲存完畢!")

In [None]:
del model

In [None]:
#評估準確率
scores = model.evaluate(X_test_normalize, y_test_onehot)
print('\n準確率=',scores[1])

#預測
prediction = np.argmax(model.predict(X_test_normalize), axis = 1)

#顯示圖像、預測值、真實值
show_images_labels_predictions(X_test, y_test, prediction, 0)

In [None]:
#從 HDF5 檔案中載入模型
from keras.models import load_model

print("載入模型 Pet_cnn_model.h5")
model = load_model('Pet_cnn_model.h5')

In [None]:
#評估準確率
scores = model.evaluate(X_test_normalize, y_test_onehot)
print('\n準確率=',scores[1])

#預測
prediction = np.argmax(model.predict(X_test_normalize), axis = 1)

#顯示圖像、預測值、真實值
show_images_labels_predictions(X_test, y_test, prediction, 0)

In [None]:
 # 這些訓練會累積，準確會愈來愈高
try:
  model.load_weights("Pet_cnn_model.weight")
  print("載入模型參數成功，繼續訓練模型!")
except :
  print("載入模型失敗，開始訓練一個新模型!")

 #定義訓練方式
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#以(train_feature_normalize,train_label_onehot)資料訓練，
#訓練資料保留 20% 作驗證,訓練2次、每批次讀取200筆資料，顯示簡易訓練過程
train_history =model.fit(x=X_train_normalize,
                         y=y_train_onehot,validation_split=0.2,
                         epochs=2, batch_size=200,verbose=1)

#評估準確率
scores = model.evaluate(X_test_normalize, y_test_onehot)
print('\n準確率=',scores[1])

#預測
prediction = np.argmax(model.predict(X_test_normalize), axis = 1)

#顯示圖像、預測值、真實值
show_images_labels_predictions(X_test, y_test, prediction, 0)

##預測新的圖片

In [None]:
%cd "/content/drive/MyDrive/CNN_pets/imagedata"

files = glob.glob("*.jpg" )
X_test = []
y_test = []
dict_labels = {"Cat":0, "Dog":1}
size = (80,80) #由於原始資料影像大小不一，因此制定一個統一值
for file in files:
    img=cv2.imread(file)
    img = cv2.resize(img, dsize=size)
    X_test.append(img)
    label=file[0:3]  # "imagedata\Cat1.jpg" 第10-12個字元 Cat為 label
    y_test.append(dict_labels[label])

In [None]:
X_test = np.array(X_test)
y_test = np.array(y_test)

X_test = X_test.reshape(len(X_test) , 80,80,3)
y_test = y_test.reshape(len(y_test) , 80,80,3)