In [None]:
# coding=utf-8
import numpy as np
import cv2
import pandas as pd
import os
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from glob import glob
import seaborn as sns

In [None]:
#整个训练集plant-seedlings-classification文件夹的路径
DATA_FOLDER = "."
# train_folder保存为train文件夹的路径
TRAIN_FOLDER = os.path.join(DATA_FOLDER,'train')
TEST_FOLDER = os.path.join(DATA_FOLDER,'test')
print(os.listdir(TRAIN_FOLDER))
print(os.listdir(TEST_FOLDER)[:10])

In [None]:
train = {}
image_type={}
i=0
'''
{0: 'Black-grass', 1: 'Charlock', 2: 'Cleavers', 3: 'Common Chickweed', 4: 'Common wheat', 5: 'Fat Hen', 6: 'Loose Silky-bent',
7: 'Maize', 8: 'Scentless Mayweed', 9: 'Shepherds Purse', 10: 'Small-flowered Cranesbill', 11: 'Sugar beet'}
'''
#train{}为一个字典  train.key()为plant的标签 对应的train[label]为所有的训练的图片的numpy矩阵
for plant_name in os.listdir(TRAIN_FOLDER):
    plant_path = os.path.join(TRAIN_FOLDER, plant_name)
    label = plant_name
    train[i] = []
    for image_path in glob(os.path.join(plant_path,'*png')):
        image = cv2.imread(image_path)
        train[i].append(image)
    print(plant_path,'读取完成！',label,'->',len(train[i]))
    image_type[i]=label
    i+=1
print(image_type)

In [None]:
test_data=[]
for image_path in glob(os.path.join(TEST_FOLDER,'*png')):
    image = cv2.imread(image_path)
    test_data.append(image)
print(len(test_data))
print(type(test_data))
print(test_data[620].shape)

In [None]:
print(len(train[0]))
print(type(train[0][0]))
print(train[0][0].dtype)
print(train[0][0].shape)

In [None]:
#显示一个label的前6张图片
def plot_for_class(label):
    nb_rows = 2
    nb_cols = 3
    fig, axs = plt.subplots(nb_rows, nb_cols, figsize=(5, 5))

    n = 0
    for i in range(0, nb_rows):
        for j in range(0, nb_cols):
            axs[i, j].xaxis.set_ticklabels([])
            axs[i, j].yaxis.set_ticklabels([])
            axs[i, j].imshow(train[label][n])
            n += 1      

In [None]:
plot_for_class(0)

In [None]:
def create_mask_for_plant(image):
    #bgr转化为hsv
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])
    
    #二值化
    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)
    
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
    
    # 形态学开操作
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask

def segment_plant(image):
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image, image, mask = mask)
    return output

def sharpen_image(image):
    image_blurred = cv2.GaussianBlur(image, (0, 0), 3)
    image_sharp = cv2.addWeighted(image, 1.5, image_blurred, -0.5, 0)
    return image_sharp

In [9]:
# Test image to see the changes
image = train[0][0]

image_mask = create_mask_for_plant(image)
image_segmented = segment_plant(image)
image_sharpen = sharpen_image(image_segmented)

fig, axs = plt.subplots(1, 4, figsize=(20, 20))
axs[0].imshow(image)
axs[1].imshow(image_mask)
axs[2].imshow(image_segmented)
axs[3].imshow(image_sharpen)

NameError: name 'create_mask_for_plant' is not defined

In [None]:
def calculate_largest_contour_area(contours):
    if len(contours) == 0:
        return 0
    c = max(contours, key=cv2.contourArea)
    return cv2.contourArea(c)

def calculate_contours_area(contours, min_contour_area = 250):
    area = 0
    for c in contours:
        c_area = cv2.contourArea(c)
        if c_area >= min_contour_area:
            area += c_area
    return area

In [None]:
areas = []
larges_contour_areas = []
labels = []
nb_of_contours = []
images_height = []
images_width = []


for class_label in train.keys():
    for image in train[class_label]:
        mask = create_mask_for_plant(image)
        contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
        area = calculate_contours_area(contours)
        largest_area = calculate_largest_contour_area(contours)
        height, width, channels = image.shape
        images_height.append(height)
        images_width.append(width)
        areas.append(area)
        nb_of_contours.append(len(contours))
        larges_contour_areas.append(largest_area)
        labels.append(class_label)

In [None]:
features_df = pd.DataFrame()
features_df["label"] = labels
features_df["area"] = areas
features_df["largest_area"] = larges_contour_areas
features_df["number_of_components"] = nb_of_contours
features_df["height"] = images_height
features_df["width"] = images_width

In [None]:
len(test_data)

In [None]:
test_areas = []
test_larges_contour_areas = []
test_nb_of_contours = []
test_images_height = []
test_images_width = []
for image in test_data:
    mask = create_mask_for_plant(image)
    contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
    area = calculate_contours_area(contours)
    largest_area = calculate_largest_contour_area(contours)
    height, width, channels = image.shape
    
    test_images_height.append(height)
    test_images_width.append(width)
    test_areas.append(area)
    test_nb_of_contours.append(len(contours))
    test_larges_contour_areas.append(largest_area)
features_test = pd.DataFrame()
features_test["area"] = test_areas
features_test["largest_area"] = test_larges_contour_areas
features_test["number_of_components"] = test_nb_of_contours
features_test["height"] = test_images_height
features_test["width"] = test_images_width

In [None]:
# print(features_df.groupby("label").describe())
print(features_df.head())
print(features_test.head())
# features_df.to_csv('train.csv',index=False)

In [None]:
type(features_df)

In [None]:
from sklearn.linear_model import LassoCV,RidgeCV

from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier,OneVsOneClassifier
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
import joblib
import warnings

# 忽略警告
warnings.filterwarnings("ignore")

In [None]:
features_df.head()

In [None]:
train=features_df.iloc[:,1:]

train = train.apply(lambda x: (x - x.mean()) / (x.std())) #归一化

label=features_df.iloc[:,:1]
print(train.head(),label.head(),sep='\n')
print(train.shape,label.shape)




In [None]:

# model = OneVsRestClassifier(svm.SVC(kernel='linear',probability=True)) #0.28
# model = OneVsRestClassifier(svm.SVC(kernel='linear')) #0.28
model = OneVsOneClassifier(svm.SVC(kernel='linear',probability=True,max_iter=80000)) #0.28
# model=OneVsRestClassifier(LogisticRegression(solver="liblinear",max_iter=80000)) #0.29
# model=OneVsOneClassifier(LogisticRegression(solver="lbfgs",max_iter=800))
print(train.head())

model.fit(np.array(train),np.array(label))
print( len(model.estimators_) )
print("========",metrics.accuracy_score(np.array(label),model.predict(np.array(train))))

joblib.dump(model,'svc.pkl')
#重新加载model，只有保存一次后才能加载model
# clf3=joblib.load('sklearn_save/clf.pkl')
# print(clf3.predict(X[0:1]))

In [None]:
features_test = features_test.apply(lambda x: (x - x.mean()) / (x.std())) #归一化
preds = model.predict(features_test)
print(type(preds),preds.shape)

preds=pd.DataFrame(preds)
print(preds.shape)
print(preds)
for i in range(12):
    preds = preds.replace(i,image_type[i])
print(preds)

'''{0: 'Black-grass', 1: 'Charlock', 2: 'Cleavers', 3: 'Common Chickweed', 4: 'Common wheat', 5: 'Fat Hen', 6: 'Loose Silky-bent',
7: 'Maize', 8: 'Scentless Mayweed', 9: 'Shepherds Purse', 10: 'Small-flowered Cranesbill', 11: 'Sugar beet'}
'''
# submission = pd.DataFrame({'file': sample['file'], 'species': pre})
# submission.to_csv('submission.csv', index=False)

sample = pd.read_csv("sample_submission.csv")
submission = pd.DataFrame({'file': sample['file'], 'species': preds[0]})
submission.to_csv('submission.csv', index=False)

# test_df['species'] = pd.Series(preds[0])
# submission = pd.concat([sample['file'], test_df['species']], axis=1)
# submission.to_csv('submission.csv', index=False)



# submission = pd.DataFrame({'file': sample['file'], 'species': pre})
# submission.to_csv('submission.csv', index=False)

In [None]:
from HOG import *