# Gradient Boosting Grid search

### import libraries

In [1]:
import os
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pickle
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import seaborn as sns
import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from pprint import pprint
from sklearn.metrics import classification_report
from skimage.feature import greycomatrix, greycoprops

### Define dataset and labels

In [2]:
dire="D:/Individual_Trees_photos/exatctcrop"
categories= ['European beech', 'European silver fir', 'Norway spruce', 'Sessile oak']

data =[]
SIZE = 200
images =[]
label1 =[]
for category in categories:
    path = os.path.join(dire, category)
    label = categories.index(category)
    
    
    for img in os.listdir(path):
        imgpath = os.path.join(path,img)
        tree_img = cv2.imread(imgpath,0)
        try:
            tree_img = cv2.resize(tree_img, (SIZE, SIZE)) #Resize images
            #tree_img = cv2.resize(tree_img,(50,50))
            image = np.array(tree_img).flatten()
        
            #data.append([image, label])
            images.append(tree_img)
            label1.append(label)
        except Exception as e:
            pass
images=np.array(images)   

### GLCM feature extraction

In [3]:
# FEATURE EXTRACTOR function
# input shape is (n, x, y, c) - number of images, x, y, and channels
def feature_extractor(dataset):
    image_dataset = pd.DataFrame()
    for image in range(dataset.shape[0]):  #iterate through each file 
        #print(image)
        
        df = pd.DataFrame()  #Temporary data frame to capture information for each loop.
        #Reset dataframe to blank after each loop.
        
        img = dataset[image, :,:]
        
        #Full image
        #GLCM = greycomatrix(img, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4])
        GLCM = greycomatrix(img, [1], [0])       
        GLCM_Energy = greycoprops(GLCM, 'energy')[0]
        df['Energy'] = GLCM_Energy
        GLCM_corr = greycoprops(GLCM, 'correlation')[0]
        df['Corr'] = GLCM_corr       
        GLCM_diss = greycoprops(GLCM, 'dissimilarity')[0]
        df['Diss_sim'] = GLCM_diss       
        GLCM_hom = greycoprops(GLCM, 'homogeneity')[0]
        df['Homogen'] = GLCM_hom       
        GLCM_contr = greycoprops(GLCM, 'contrast')[0]
        df['Contrast'] = GLCM_contr
        
        GLCM2 = greycomatrix(img, [3], [0])       
        GLCM_Energy2 = greycoprops(GLCM2, 'energy')[0]
        df['Energy2'] = GLCM_Energy2
        GLCM_corr2 = greycoprops(GLCM2, 'correlation')[0]
        df['Corr2'] = GLCM_corr2       
        GLCM_diss2 = greycoprops(GLCM2, 'dissimilarity')[0]
        df['Diss_sim2'] = GLCM_diss2       
        GLCM_hom2 = greycoprops(GLCM2, 'homogeneity')[0]
        df['Homogen2'] = GLCM_hom2       
        GLCM_contr2 = greycoprops(GLCM2, 'contrast')[0]
        df['Contrast2'] = GLCM_contr2

        GLCM3 = greycomatrix(img, [5], [0])       
        GLCM_Energy3 = greycoprops(GLCM3, 'energy')[0]
        df['Energy3'] = GLCM_Energy3
        GLCM_corr3 = greycoprops(GLCM3, 'correlation')[0]
        df['Corr3'] = GLCM_corr3       
        GLCM_diss3 = greycoprops(GLCM3, 'dissimilarity')[0]
        df['Diss_sim3'] = GLCM_diss3       
        GLCM_hom3 = greycoprops(GLCM3, 'homogeneity')[0]
        df['Homogen3'] = GLCM_hom3       
        GLCM_contr3 = greycoprops(GLCM3, 'contrast')[0]
        df['Contrast3'] = GLCM_contr3
        
        GLCM4 = greycomatrix(img, [0], [np.pi/4])       
        GLCM_Energy4 = greycoprops(GLCM4, 'energy')[0]
        df['Energy4'] = GLCM_Energy4
        GLCM_corr4 = greycoprops(GLCM4, 'correlation')[0]
        df['Corr4'] = GLCM_corr4       
        GLCM_diss4 = greycoprops(GLCM4, 'dissimilarity')[0]
        df['Diss_sim4'] = GLCM_diss4       
        GLCM_hom4 = greycoprops(GLCM4, 'homogeneity')[0]
        df['Homogen4'] = GLCM_hom4       
        GLCM_contr4 = greycoprops(GLCM4, 'contrast')[0]
        df['Contrast4'] = GLCM_contr4
        
        GLCM5 = greycomatrix(img, [0], [np.pi/2])       
        GLCM_Energy5 = greycoprops(GLCM5, 'energy')[0]
        df['Energy5'] = GLCM_Energy5
        GLCM_corr5 = greycoprops(GLCM5, 'correlation')[0]
        df['Corr5'] = GLCM_corr5       
        GLCM_diss5 = greycoprops(GLCM5, 'dissimilarity')[0]
        df['Diss_sim5'] = GLCM_diss5       
        GLCM_hom5 = greycoprops(GLCM5, 'homogeneity')[0]
        df['Homogen5'] = GLCM_hom5       
        GLCM_contr5 = greycoprops(GLCM5, 'contrast')[0]
        df['Contrast5'] = GLCM_contr5
        
        #Add more filters as needed
        #entropy = shannon_entropy(img)
        #df['Entropy'] = entropy

        
        #Append features from current image to the dataset
        image_dataset = image_dataset.append(df)
        
    return image_dataset

In [4]:
#Extract features from images
image_features = feature_extractor(images)
X_for_ML =image_features


### Train-test split

In [5]:
#train test

x1train, x1test, ytrain, ytest = train_test_split(X_for_ML, label1, test_size =0.25)

### Scaling

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
xtrain = scaler.fit_transform(x1train)
xtest = scaler.fit_transform(x1test)


# Define classifier GB

In [7]:
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(random_state=0)
gb.fit(xtrain, ytrain)

print("Accuracy on training set: {:.3f}".format(gb.score(xtrain, ytrain)))
print("Accuracy on test set: {:.3f}".format(gb.score(xtest, ytest)))

Accuracy on training set: 1.000
Accuracy on test set: 0.848


In [8]:
gb1 = GradientBoostingClassifier(random_state=10, max_depth=2)
gb1.fit(xtrain, ytrain)

print("Accuracy on training set: {:.3f}".format(gb1.score(xtrain, ytrain)))
print("Accuracy on test set: {:.3f}".format(gb1.score(xtest, ytest)))


Accuracy on training set: 0.997
Accuracy on test set: 0.848


In [9]:
pprint(gb1.get_params())

{'ccp_alpha': 0.0,
 'criterion': 'friedman_mse',
 'init': None,
 'learning_rate': 0.1,
 'loss': 'deviance',
 'max_depth': 2,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_iter_no_change': None,
 'random_state': 10,
 'subsample': 1.0,
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}


In [10]:
predictions1 = gb1.predict(xtest)
print (classification_report(ytest, predictions1))

              precision    recall  f1-score   support

           0       0.93      1.00      0.97        28
           1       0.91      0.71      0.80        28
           2       0.78      0.85      0.82        34
           3       0.81      0.83      0.82        42

    accuracy                           0.85       132
   macro avg       0.86      0.85      0.85       132
weighted avg       0.85      0.85      0.85       132



### Grid search

In [11]:
# Define parameters

learning_rate= [0.1, 0.05, 0.01]

n_estimators= [100, 200, 300, 500]

# min_samples_split= [2,3,4]

# min_samples_leaf= [1,2,3]

max_depth = [2, 3, 4, 8]


# max_features = ['auto', 'sqrt']
 
# subsample = [0.001, 0.01, 0.5, 1.0,]


#random_state
random_state = [0, 1, 2, 3, 4, 5, 10, 40, 42]

param_grid1 = dict(learning_rate = learning_rate, n_estimators=n_estimators, 
                 max_depth = max_depth, random_state = random_state )

# Build the gridsearch
dfrst1 = GradientBoostingClassifier(n_estimators=n_estimators,
                                    learning_rate = learning_rate, 
                 max_depth = max_depth, random_state = random_state)
grid1 = GridSearchCV(estimator=dfrst1, param_grid=param_grid1, cv = 15)
grid_results1 = grid1.fit(xtrain, ytrain)


# Summarize the results in a readable format
print("Best: {0}, using {1}".format(grid_results1.cv_results_['mean_test_score'], grid_results1.best_params_))
results_df1 = pd.DataFrame(grid_results1.cv_results_)
results_df1

Best: [0.85327635 0.85327635 0.85327635 0.85327635 0.85327635 0.85327635
 0.85327635 0.85327635 0.85327635 0.85840456 0.85346629 0.85840456
 0.85840456 0.85840456 0.85840456 0.85840456 0.85346629 0.85840456
 0.85346629 0.85346629 0.85346629 0.85346629 0.85346629 0.85346629
 0.85346629 0.85346629 0.85346629 0.85593542 0.85593542 0.85593542
 0.85593542 0.85593542 0.85593542 0.85593542 0.85593542 0.85593542
 0.84567901 0.84824311 0.84824311 0.85090218 0.85080722 0.84824311
 0.84577398 0.84577398 0.84567901 0.85071225 0.84814815 0.85071225
 0.84814815 0.84814815 0.85071225 0.84558405 0.85318139 0.85574549
 0.85584046 0.85840456 0.85584046 0.85584046 0.85840456 0.85584046
 0.85584046 0.85840456 0.85584046 0.84586895 0.85099715 0.84586895
 0.84586895 0.85099715 0.84586895 0.84586895 0.85099715 0.84586895
 0.85090218 0.85080722 0.85346629 0.85090218 0.85080722 0.84824311
 0.85346629 0.85840456 0.85080722 0.84833808 0.84833808 0.85090218
 0.85090218 0.84330484 0.84577398 0.84833808 0.85090218 

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,param_random_state,params,split0_test_score,...,split8_test_score,split9_test_score,split10_test_score,split11_test_score,split12_test_score,split13_test_score,split14_test_score,mean_test_score,std_test_score,rank_test_score
0,0.589954,0.026095,0.000869,0.000500,0.1,2,100,0,"{'learning_rate': 0.1, 'max_depth': 2, 'n_esti...",0.740741,...,0.807692,0.846154,0.846154,0.846154,0.884615,0.923077,0.807692,0.853276,0.044975,67
1,0.635200,0.031246,0.000800,0.000400,0.1,2,100,1,"{'learning_rate': 0.1, 'max_depth': 2, 'n_esti...",0.740741,...,0.807692,0.846154,0.846154,0.846154,0.884615,0.923077,0.807692,0.853276,0.044975,67
2,0.584051,0.035267,0.000602,0.000492,0.1,2,100,2,"{'learning_rate': 0.1, 'max_depth': 2, 'n_esti...",0.740741,...,0.807692,0.846154,0.846154,0.846154,0.884615,0.923077,0.807692,0.853276,0.044975,67
3,0.557029,0.009029,0.000734,0.000442,0.1,2,100,3,"{'learning_rate': 0.1, 'max_depth': 2, 'n_esti...",0.740741,...,0.807692,0.846154,0.846154,0.846154,0.884615,0.923077,0.807692,0.853276,0.044975,67
4,0.568825,0.025559,0.000800,0.000400,0.1,2,100,4,"{'learning_rate': 0.1, 'max_depth': 2, 'n_esti...",0.740741,...,0.807692,0.846154,0.846154,0.846154,0.884615,0.923077,0.807692,0.853276,0.044975,67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,10.814425,0.241160,0.005932,0.000249,0.01,8,500,4,"{'learning_rate': 0.01, 'max_depth': 8, 'n_est...",0.666667,...,0.884615,0.846154,0.730769,0.730769,0.884615,0.846154,0.653846,0.800285,0.079274,381
428,10.754427,0.210965,0.005861,0.000497,0.01,8,500,5,"{'learning_rate': 0.01, 'max_depth': 8, 'n_est...",0.666667,...,0.884615,0.846154,0.730769,0.730769,0.884615,0.846154,0.653846,0.797816,0.078199,393
429,11.590090,2.071322,0.006202,0.000652,0.01,8,500,10,"{'learning_rate': 0.01, 'max_depth': 8, 'n_est...",0.666667,...,0.884615,0.846154,0.730769,0.730769,0.884615,0.846154,0.615385,0.795252,0.083339,399
430,11.048685,0.725580,0.005866,0.000500,0.01,8,500,40,"{'learning_rate': 0.01, 'max_depth': 8, 'n_est...",0.666667,...,0.884615,0.807692,0.730769,0.730769,0.884615,0.846154,0.653846,0.800285,0.080508,380
