In [None]:
import os
import time
import random
import joblib
from collections import Counter
from tqdm import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2
from skimage import feature as ft

from sklearn.model_selection import StratifiedShuffleSplit, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

import xgboost as xgb
from xgboost import XGBClassifier
import lightgbm as lgb


import warnings
warnings.filterwarnings("ignore")

In [None]:
data_dir = "/kaggle/input/neu-plantseedlingsclassification12/Nonsegmented_pack - k/"

class_names = os.listdir(data_dir + "train/")
classes_dict = dict()

for i, name in enumerate(class_names):
    classes_dict[name] = i
classes_dict

In [None]:
# 直方图均衡化
def equalize(img):
    """Parameter `img` is read by cv2.imread(), so it is in BGR mode"""
    
    b, g, r = cv2.split(img)
    
    b = cv2.equalizeHist(b)
    g = cv2.equalizeHist(g)    
    r = cv2.equalizeHist(r)
    
    img_equalized = cv2.merge((b, g, r))
    
    return img_equalized

In [None]:
#测试效果
img_bgr_try = cv2.imread(data_dir + "train/Charlock/aahijusizs.png")
img_equalized = equalize(img_bgr_try)

fig  = plt.figure(figsize=(8, 16))
ax1 = plt.subplot(121)
ax1.imshow(img_bgr_try)
ax1.set_title("Original Image (BGR)")
ax2 = plt.subplot(122)
ax2.imshow(img_equalized)
ax2.set_title("Applying Histogram Equalization (BGR)")
plt.show()

In [None]:
#只要绿色的部分
def extract_seedling(img):
    """Parameter `img` is read by cv2.imread(), so it is in BGR mode"""
    
    # Green color range
    green_lower = np.array([35, 43, 46], dtype="uint8")   # Lower bound
    green_upper = np.array([90, 255, 255], dtype="uint8") # Upper bound
    
    # Gaussian filtering
    img_gaussed = cv2.GaussianBlur(img, (3, 3), 0)
    img_gaussed = cv2.cvtColor(img_gaussed, cv2.COLOR_BGR2HSV)
    
    # Binarization to create mask
    mask = cv2.inRange(img_gaussed, green_lower, green_upper)
    
    img_extracted = cv2.bitwise_and(img, img, mask=mask)
    
    return img_extracted

In [None]:
#测试

img_bgr_try = cv2.imread(data_dir + "train/Charlock/aahijusizs.png")
img_extracted = extract_seedling(img_bgr_try)

fig  = plt.figure(figsize=(8, 16))
ax1 = plt.subplot(121)
ax1.imshow(img_bgr_try)
ax1.set_title("Original Image (BGR)")
ax2 = plt.subplot(122)
ax2.imshow(img_extracted)
ax2.set_title("Extracting Seedling Parts (BGR)")
plt.show()

In [None]:
def preprocessing_imgs(file_dir):
    """Preprocess train image by applying equalize() and then extract_seedling().
       Store the preprocessed images in train_preprocessed folder and get their class labels."""
    
    images_list = []
    labels_list = []
    
    classes = os.listdir(file_dir)
    # print(classes)
    
    for cls in classes:
        img_names = os.listdir(file_dir + cls)
        # cls_folder = train_preprocessed_dir + cls
        # if not os.path.exists(cls_folder):
        #     os.makedirs(cls_folder)        # Create class folder if it does not exist
            
        for img_name in img_names:
            img_bgr = cv2.imread(file_dir + cls + '/' + img_name)
            img_equalized = equalize(img_bgr)
            img_seedling = extract_seedling(img_equalized)
            # cv2.imwrite(cls_folder + '/' + img_name, img_seedling)
            
            images_list.append(img_seedling)
            labels_list.append(classes_dict[cls])

    return images_list, labels_list

In [None]:
images_list, labels_list = preprocessing_imgs(data_dir + "train/")
num_per_class = Counter(labels_list)
num_per_class

In [None]:
all_label = np.array(labels_list)

print(f'图像数量: {len(images_list)}')                 # 4440
print(f'第一张图片的shape: {images_list[0].shape}')    # (w, h, 3)
print(f'label数量: {all_label.shape[0]}')              # 4440

In [None]:
def resize_imgs(images_list):
    """Resize images into the shape as (256, 256, 3) to ensure that they have the same shape."""
    
    imgs_list = []
    for image in images_list:
        image = cv2.resize(image, (128, 128)) 
        imgs_list.append(image)
    return imgs_list

In [None]:
# 填充缺失值
def fill_missing(feature):
    feature_df = pd.DataFrame(feature)      # 转为 DataFrame 格式，才能使用 fillna 函数
    feature_df_fill = feature_df.fillna(0)  # 将缺失值部分填充0
    
    return np.array(feature_df_fill)        # 返回array格式

In [None]:
# 标准化
def normalize(feature):
    scaler = StandardScaler()
    scaler.fit(feature)
    feature_normal = scaler.transform(feature)
    
    return feature_normal

In [None]:
# Use PCA(Principal Component Analysis) to reduce dimensionality
def dimensionalityReduction(feature, n=100, is_whiten=False, is_show=True):
    estimator = PCA(n_components=n, whiten=is_whiten)
    pca_feature = estimator.fit_transform(feature)
    
    sum = 0
    for ratio in estimator.explained_variance_ratio_:
        sum += ratio
        if is_show:
            print(sum)
    
    print('降维后特征矩阵shape为:', pca_feature.shape)
    print('主成分比例为:', sum)
    
    return pca_feature

In [None]:
print(os.getcwd())

os.makedirs("./save_features", exist_ok=True)

In [None]:
feature_dir = "./save_features/"

# 将提取出的特征数组（二维）保存到npy文件中
def save_feature(feature, fileName):
    np.save(feature_dir + fileName + '.npy', feature, allow_pickle=True)
    
    print(fileName + '.npy', '文件已生成！')

In [None]:
feature_dir = "./save_features/"

# 读取之前保存好的 feature 文件，返回特征矩阵（二维数组）
def read_feature(fileName):
    feature = np.load(feature_dir + fileName + '.npy', allow_pickle=True)
    print('已读取', fileName, '文件！\t shape = ', feature.shape)

    return feature

In [None]:
#SIFT特征
def sift_feature(images_list):
    """Extract SIFT feature of the image"""
    
    feature_sift_list = []  # SIFT特征向量列表
    fail_count=0
    # sift = cv2.xfeatures2d.SIFT_create()   # old-version cv2
    sift = cv2.SIFT_create()
    
    for i in tqdm(range(len(images_list))):
        image = cv2.cvtColor(images_list[i], cv2.COLOR_BGR2GRAY)
        
        # Obtain SIFT feature，kp is keypoints，des is descriptors (feature vectors)
        kp, des = sift.detectAndCompute(image, None)
        if des is None:
            fail_count += 1
            des = feature_sift_list[len(feature_sift_list) - 1]
        feature_sift_list.append(des)
        
    return feature_sift_list

In [None]:
%%time

## TEST SIFT
sift = cv2.SIFT_create()

img_extracted_gray = cv2.cvtColor(img_extracted, cv2.COLOR_BGR2GRAY)

kp = sift.detect(img_extracted_gray, None)   #  找到关键点

# 绘制关键点 
img_kp_gray = cv2.drawKeypoints(img_extracted_gray, kp, img_extracted_gray)

# 计算关键点对应的sift特征向量
# kp为关键点列表，des为numpy的数组，为 (关键点数目,128)
kp, des = sift.compute(img_kp_gray, kp)

print('kp[0] =', kp[0])
print('len(kp) =', len(kp))
print('des.shape =', des.shape)

plt.figure(figsize=(8,8))
plt.imshow(img_kp_gray)
plt.axis('off')
plt.show()

In [None]:
%%time

# 获取 SIFT 特征列表
feature_sift_list = sift_feature(images_list)

In [None]:
print(len(feature_sift_list))     # 4440
print(feature_sift_list[0])
print()

# 以下3个输出为sift特征的 shape, 为 (关键点数量, 128)
print(feature_sift_list[0].shape)      # (?, 128)
print(feature_sift_list[1].shape)      # (?, 128)
print(feature_sift_list[100].shape)    # (?, 128)

In [None]:
## 初始化BOW提取器

def bow_init(feature_sift_list):
    """Initialize BOW trainer"""
    
    ## 创建BOW训练器，指定 k-means 参数 k 把处理好的特征数据全部合并，利用聚类把特征词分为若干类，
    ## 此若干类的数目由自己设定，每一类相当于一个视觉词汇
    bow_kmeans_trainer = cv2.BOWKMeansTrainer(100)    # 100 个词汇
    
    for feature_sift in feature_sift_list:
        bow_kmeans_trainer.add(feature_sift)
    
    # 进行k-means聚类，返回词汇字典 也就是聚类中心
    voc = bow_kmeans_trainer.cluster()
    
    # 输出词汇字典
    print("In bow_init(), print variable `voc`: ")
    print(type(voc), voc.shape)    # <class 'numpy.ndarray'> (100, 256)
    print(voc)
    print()
    
    # FLANN 匹配  
    # algorithm 用来指定匹配所使用的算法，可以选择的有 LinearIndex、KTreeIndex、KMeansIndex、CompositeIndex 和 AutotuneIndex
    # 这里选择的是 KTreeIndex (使用 kd树 实现最近邻搜索)
    flann_params = dict(algorithm=1, tree=5)           # define parameters for cv2.FlannBasedMatcher()
    flann = cv2.FlannBasedMatcher(flann_params,{})
    
    print(flann)
    
    #初始化 bow 提取器(设置词汇字典),用于提取每一张图像的BOW特征描述
    # sift = cv2.xfeatures2d.SIFT_create()   # old-version cv2
    sift = cv2.SIFT_create()
    bow_img_descriptor_extractor = cv2.BOWImgDescriptorExtractor(sift, flann)        
    bow_img_descriptor_extractor.setVocabulary(voc)
    
    print(bow_img_descriptor_extractor)
    
    return bow_img_descriptor_extractor

In [None]:
## BOW特征
def bow_feature(bow_img_descriptor_extractor, images_list):
    # 分别对每个图片提取BOW特征，获得BOW特征列表
    feature_bow_list = []
    
    # sift = cv2.xfeatures2d.SIFT_create()   # old-version cv2
    sift = cv2.SIFT_create()
    for i in tqdm(range(len(images_list))):
        image = cv2.cvtColor(images_list[i], cv2.COLOR_BGR2GRAY)
        feature_bow = bow_img_descriptor_extractor.compute(image,sift.detect(image))
        feature_bow_list.append(feature_bow)
        
    return np.array(feature_bow_list)[:,0,:]

In [None]:
%%time
## 获取 SIFT + BOW 特征

# 从图像中提取并保存在npy文件中
bow_extractor = bow_init(feature_sift_list)
all_feature_bow = bow_feature(bow_extractor, images_list)  # shape = (4440, 100)
# save_feature(all_feature_bow, 'all_feature_bow')

In [None]:
# 所有图片 resize 成(256,256)，保证每个图像提取出的 HOG，LBP 特征数量一致
# 提取 HOG，LBP 特征前调用
images_list = resize_imgs(images_list)

In [None]:
from skimage import color, feature
from tqdm import tqdm

# 提取 HOG 特征
def hog_feature(image_list):
    feature_hog_list = []
    for i in tqdm(range(len(image_list))):  # ft is short for skimage.feature
        gray = cv2.cvtColor(image_list[i], cv2.COLOR_RGB2GRAY)
        feature_hog = ft.hog(gray, 
                             orientations=12, 
                             pixels_per_cell=(16, 16), 
                             cells_per_block=(3, 3))
        feature_hog_list.append(feature_hog)
    
    return np.array(feature_hog_list)


In [None]:
%%time

## 获取 HOG 特征
## 从图像中提取并保存在 csv 文件中
all_feature_hog = hog_feature(images_list)

# print('all_feature_hog.shape =', all_feature_hog.shape)
# save_feature(all_feature_hog, 'all_feature_hog')

In [None]:
from skimage.feature import hog
from skimage import color

# 将多通道图像转换为灰度图像
gray_img = color.rgb2gray(img_extracted)

# 对灰度图像应用HOG特征提取
feature_hog = hog(gray_img,
                  orientations=16,
                  pixels_per_cell=(32, 32),
                  cells_per_block=(3, 3),
                  feature_vector=True,
                  visualize=True)

print(feature_hog[0].shape[0])
plt.imshow(feature_hog[1], cmap=plt.cm.gray)


In [None]:
# 提取LBP特征
def lbp_feature(image_list):
    feature_lbp_list = []
    for j in tqdm(range(len(image_list))):
        feature_lbp = []
        image = image_list[j]
        for i in range(3):
            feature_lbp.append(ft.local_binary_pattern(np.array(image[:,:,i]), 64, 64, 'var'))
        feature_lbp_list.append(feature_lbp)
    return np.array(feature_lbp_list)


In [None]:
%%time

## 获取 LBP 特征
all_feature_lbp = lbp_feature(images_list)    # shape = (4440, 3, 256, 256)
print(np.array(all_feature_lbp).shape)        # (4440, 3, 256, 256)



## 将四维转为二维
all_feature_lbp = all_feature_lbp.reshape(all_feature_lbp.shape[0], 
                                          all_feature_lbp.shape[1] * all_feature_lbp.shape[2] * all_feature_lbp.shape[3])

print('all_feature_lbp.shape =', all_feature_lbp.shape)  # (4440, 196608)

In [None]:
# TEST LBP
feature_lbp = []
for i in range(3):
    feature_lbp.append(ft.local_binary_pattern(img_extracted[:,:,i], 64, 64, 'var'))
print(feature_lbp)    # list of ndarray
print()

feature_lbp_array = np.array(feature_lbp)
print(feature_lbp_array.shape)

# 挑选出非 nan 值
feature_lbp_array[np.logical_not(np.isnan(feature_lbp_array))]

In [None]:
## 填充缺失值
all_feature_lbp = fill_missing(all_feature_lbp)  
# save_feature(all_feature_lbp, 'all_feature_lbp')

In [None]:
%%time
all_feature_bow_normal = normalize(all_feature_bow)
save_feature(all_feature_bow_normal, 'all_feature_bow_normal')

all_feature_hog_normal = normalize(all_feature_hog)
save_feature(all_feature_hog_normal, 'all_feature_hog_normal')

all_feature_lbp_normal = normalize(all_feature_lbp)
save_feature(all_feature_lbp_normal, 'all_feature_lbp_normal')

In [None]:
%%time

print('all_feature_bow.shape =', all_feature_bow.shape)    # all_feature_hog.shape = (4440, 100)
pca_feature_bow = dimensionalityReduction(all_feature_bow, 100)
print('pca_feature_bow.shape =', pca_feature_bow.shape)    # pca_feature_hog.shape = (4440, 100)
save_feature(pca_feature_bow, 'pca_feature_bow')

In [None]:
%%time

print('all_feature_hog.shape =', all_feature_hog.shape)    # all_feature_hog.shape = (4440, 21168)
pca_feature_hog = dimensionalityReduction(all_feature_hog, 1000)
print('pca_feature_hog.shape =', pca_feature_hog.shape)    # pca_feature_hog.shape = (4440, 4000)
save_feature(pca_feature_hog, 'pca_feature_hog')

In [None]:
%%time

print('all_feature_lbp.shape =', all_feature_lbp.shape)  # all_feature_lbp.shape = (4440, 196608)
pca_feature_lbp = dimensionalityReduction(all_feature_lbp, 500, True)
print('pca_feature_lbp.shape =', pca_feature_lbp.shape)  # pca_feature_lbp.shape = (4440, 100)
save_feature(pca_feature_lbp, 'pca_feature_lbp')

In [None]:
# 将 SIFT+BOW, HOG, LBP 三种特征矩阵拼合到一起
# all_feature_list = [all_feature_bow_normal, pca_feature_hog, pca_feature_lbp]
all_feature_list = [pca_feature_bow, pca_feature_hog, pca_feature_lbp]

all_feature = [[] for i in range(4440)]  # 创建二维空数组，行数为 4440

for feature in all_feature_list:
    all_feature = np.hstack((all_feature, feature))
    
print(all_feature.shape)

np.save(feature_dir + 'all_feature', all_feature, allow_pickle=True)

In [None]:
# 按类划分数据集
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
sss.get_n_splits(all_feature, all_label)
for train_index, test_index in sss.split(all_feature, all_label):
    x_train, x_val = all_feature[train_index], all_feature[test_index]
    y_train, y_val = all_label[train_index], all_label[test_index]

print("x_train: ", x_train.shape)
print("y_train: ", y_train.shape)
print("x_val  : ", x_val.shape)
print("y_val  : ", y_val.shape)

In [None]:
num_class_train = np.zeros(12, dtype=np.int64)
num_class_val = np.zeros(12, dtype=np.int64)
for y in y_train:
    num_class_train[y] += 1
for y in y_val:
    num_class_val[y] += 1
    
print('划分后训练集中各类的数量 =', num_class_train)
print('数据集中各类的数量 * 0.8 =', [round((i * 0.8), 1) for i in num_per_class.values()])
print('划分后验证集中各类的数量 =', num_class_val)
print('数据集中各类的数量 * 0.2 =', [round((i * 0.2), 1) for i in num_per_class.values()])

In [None]:
# 展示各类的准确率、召回率、f1-score，及混淆矩阵可视化
def category_show(model, x_val, y_val):
    target_names = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat',
                    'Fat Hen', 'Loose Silky-bent', 'Maize','Scentless Mayweed', 'Shepherds Purse', 
                    'Small-flowered Cranesbill', 'Sugar beet']
    y_pred = model.predict(x_val)
    
    print(classification_report(y_val, y_pred, target_names=target_names))
    cm = confusion_matrix(y_val, y_pred)
    cm_display = ConfusionMatrixDisplay(cm).plot()

In [None]:
%%time

# XGBoost 模型
model_xgb = XGBClassifier(# objective="multi:softmax",
                          objective='multi:softproba',
                          n_estimators=1000,
                          num_class=12,
                          learning_rate=0.1,
                          # tree_method='gpu_hist', 
                          # gpu_id=-1,
                          max_depth=6, 
                          min_child_weight=2, 
                          max_delta_step=3, 
                          subsample=1, 
                          gamma=0, 
                          n_jobs=-1,)

model_xgb.fit(x_train, y_train, 
              early_stopping_rounds=10, 
              eval_set=[(x_val, y_val)], 
              eval_metric='mlogloss', 
              verbose=50)
score_xgb = model_xgb.score(x_val, y_val)
print('score_xgb =', score_xgb)

In [None]:
category_show(model_xgb, x_val, y_val)

In [None]:
%%time

# LightGBM 模型
model_lgb = lgb.LGBMClassifier(learning_rate=0.1, 
                               objective='multiclass', 
                               num_class=12, 
                               n_estimators=1500, 
                               max_depth=3, 
                               sub_sample=0.7, 
                               n_jobs=-1)
model_lgb.fit(x_train, y_train, 
              early_stopping_rounds=10, 
              eval_set=[(x_val, y_val)], 
              eval_metric ='logloss', 
              verbose=10)
score_lgb = model_lgb.score(x_val, y_val)
print('score_lgb =', score_lgb)

In [None]:
category_show(model_lgb, x_val, y_val)

In [None]:
%%time

# GBDT模型
model_gbdt = GradientBoostingClassifier(learning_rate=0.1, n_estimators=500, max_depth=3)
model_gbdt.fit(x_train, y_train)
score_gbdt = model_gbdt.score(x_val, y_val)
print('score_gbdt =', score_gbdt)

In [None]:
category_show(model_gbdt, x_val, y_val)

In [None]:
%%time

# RandomForest 模型
model_rf = RandomForestClassifier(n_estimators=150, n_jobs=-1)
model_rf.fit(x_train, y_train)
score_rf = model_rf.score(x_val, y_val)
print('score_rf =', score_rf)

In [None]:
category_show(model_rf, x_val, y_val)

In [None]:
%%time

# SVC 模型
model_SVC = SVC(C=1)
model_SVC.fit(x_train, y_train)
score_SVC = model_SVC.score(x_val, y_val)
print('score_SVC =', score_SVC)

In [None]:
category_show(model_SVC, x_val, y_val)

In [None]:
model_knn = KNeighborsClassifier(n_neighbors=1)
model_knn.fit(x_train, y_train)
score_knn = model_knn.score(x_val, y_val)
print('score_knn =', score_knn)

In [None]:
category_show(model_knn, x_val, y_val)

In [None]:
%%time

model_sgdc = SGDClassifier(max_iter=1000, tol=1e-3)
model_sgdc.fit(x_train, y_train)
score_sgdc = model_sgdc.score(x_val, y_val)
print('score_sgdc =', score_sgdc)

In [None]:
category_show(model_sgdc, x_val, y_val)

In [None]:
model_ET = ExtraTreesClassifier()
model_ET.fit(x_train, y_train)
score_ET = model_ET.score(x_val, y_val)
print('score_ET =', score_ET)

In [None]:
category_show(model_ET, x_val, y_val)

In [None]:
%%time

estimators = [('rf', model_rf),
              ('lgb', lgb.LGBMClassifier(learning_rate=0.1, 
                                         objective='multiclass', 
                                         num_class=12, 
                                         n_estimators=150, 
                                         max_depth=2, 
                                         n_jobs=-1)),
              ('SVC', model_SVC),
              ('SGDC', model_sgdc),
              ('ET', model_ET)
             ]

model_stack = StackingClassifier(estimators=estimators, 
                                 final_estimator= XGBClassifier(learning_rate=0.1, 
                                                                # objective='multi:softmax',
                                                                objective='multi:softproba',
                                                                num_class=12, 
                                                                n_estimators=500, 
                                                                # tree_method='gpu_hist', 
                                                                # gpu_id=0, 
                                                                max_depth=3, 
                                                                min_child_weight=3, 
                                                                max_delta_step=3, 
                                                                subsample=0.7, 
                                                                gamma=0, 
                                                                n_jobs=-1, 
                                                                use_label_encoder=False)
                                )

model_stack.fit(x_train, y_train)

score_stack = model_stack.score(x_val, y_val)
print('score_stack =', score_stack)

In [None]:
def preprocessing_test_imgs(file_dir):
    """Preprocess train image by applying equalize() and then extract_seedling().
       Store the preprocessed images in train_preprocessed folder and get their class labels."""
    
    images_list = []
    img_names = os.listdir(file_dir + "test/")
    for img_name in img_names:
        img_bgr = cv2.imread(file_dir + 'test/' + img_name)
        img_equalized = equalize(img_bgr)
        img_seedling = extract_seedling(img_equalized)
        # cv2.imwrite(cls_folder + '/' + img_name, img_seedling)

        images_list.append(img_seedling)
    return images_list

In [None]:
imgs_list_test = preprocessing_test_imgs(data_dir)

In [None]:
len(imgs_list_test)

In [None]:
plt.imshow(imgs_list_test[0])
plt.show()

In [None]:
%%time

# feature_sift_list_test = sift_feature(imgs_list_test)  # 不需要
# bow_extractor_test = bow_init(feature_sift_list_test)  # 不需要

feature_bow_test = bow_feature(bow_extractor, imgs_list_test)  # shape = (794, 100)

In [None]:
print(type(feature_bow_test), len(feature_bow_test))
feature_bow_test.shape

In [None]:
feature_bow_test[np.isnan(feature_bow_test)]    # Test for nan

In [None]:
## Normalize
feature_bow_normal_test = normalize(feature_bow_test)

## Dimensionality Reduction
feature_bow_pca_test = dimensionalityReduction(feature_bow_normal_test, 100)
feature_bow_pca_test.shape

In [None]:
%%time

imgs_list_test = resize_imgs(imgs_list_test)     # Resize images

feature_hog_test = hog_feature(imgs_list_test)   # (794, 100)

In [None]:
feature_hog_test[np.isnan(feature_hog_test)]   # Test for nan

In [None]:
## Normalize
feature_hog_normal_test = normalize(feature_hog_test)

## Dimensionality Reduction
feature_hog_pca_test = dimensionalityReduction(feature_hog_normal_test, 100)
feature_hog_pca_test.shape

In [None]:
## 获取 LBP 特征
feature_lbp_test = lbp_feature(imgs_list_test)     # (794, 3, 128, 128)
type(feature_lbp_test)

In [None]:
## Reshape into 2 Dimensions
feature_lbp_test = feature_lbp_test.reshape(feature_lbp_test.shape[0], -1)
feature_lbp_test.shape

In [None]:
feature_lbp_test[np.isnan(feature_lbp_test)]   # Test for nan

# feature_lbp_test[np.logical_not(np.isnan(feature_lbp_test))]

In [None]:
## Fill nan with 0
feature_lbp_test = fill_missing(feature_lbp_test)
feature_lbp_test[np.isnan(feature_lbp_test)]        # Test for nan

In [None]:
## Normalize
feature_lbp_normal_test = normalize(feature_lbp_test)

## Dimensionality Reduction
feature_lbp_pca_test = dimensionalityReduction(feature_lbp_normal_test, 100)
feature_lbp_pca_test.shape

In [None]:
# 将 SIFT+BOW, HOG, LBP 三种特征矩阵拼合到一起
features_list_test = [feature_bow_pca_test, feature_hog_pca_test, feature_lbp_pca_test]


features_test = [[] for i in range(1104)]

for feature in features_list_test:
    features_test = np.hstack((features_test, feature))
    
print(features_test.shape)

# np.save(feature_dir + 'features_test', features_test, allow_pickle=True)

In [None]:
predict_test = model_stack.predict(features_test)

In [None]:
print(predict_test.shape)

predict_test

In [None]:
img_names = os.listdir(data_dir + "test/")

In [None]:
img_names

In [None]:
plt.imshow(cv2.imread(data_dir + "test/" +os.listdir(data_dir + "test/")[0]))
plt.show()

In [None]:
classes_dict

In [None]:
classes_dict.keys()

In [None]:
num_to_class_dict = dict(zip(classes_dict.values(), classes_dict.keys()))
# num_to_class_dict 

In [None]:
predict_class = []
for pred in predict_test:
    predict_class.append(num_to_class_dict[pred])
# predict_class

In [None]:
df_submission = pd.DataFrame()
df_submission['file'] = img_names
df_submission['species'] = predict_class

In [None]:
df_submission.head()

In [None]:
df_submission.to_csv("submission.csv", index=False)