In [56]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
import xgboost
from sklearn import metrics
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
import math
from skimage.io import imread
from skimage.transform import resize
from skimage.color import rgb2gray

def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
    Returns  Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"

    
#     revised code
#   get the total length
    total_len=0
    for direct in folders:
        for i,f in enumerate(direct.iterdir()):
            total_len+=1
            if i>10:
              break
    print('length: '+str(total_len))
    flat_data=np.zeros( (total_len,dimension[0]*dimension[1]) )
    target=np.zeros(total_len)
    index=0
    
    for i, direc in enumerate(folders):
        print('adding files to '+direc.name)
        for n,file in enumerate(direc.iterdir()):
            if file.name.endswith('.png') and not file.name.startswith('.'):
                img = rgb2gray(imread(file))
                img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
                flat_data[index]=(img_resized.flatten()) 
    #             images.append(img_resized)
                target[index]=i
                index+=1
                del img
                del img_resized
            if n>10:
              break

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
#                  images=images,
                 DESCR=descr)


In [57]:
image_dataset = load_image_files("Fabrics_train_lowClass/",(120,120))

length: 120
adding files to Felt
adding files to Artificial_leather
adding files to Cotton
adding files to Chenille
adding files to Blended
adding files to Corduroy
adding files to Denim
adding files to Artificial_fur
adding files to Acrylic
adding files to Crepe


In [62]:
# split data
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3)

# --------------------------------
# clf25=xgboost.XGBClassifier()
# clf25.fit(X_train, y_train)
# # predict data
# y_pred = clf25.predict(X_test)
# --------------------------------
# Set our parameters for xgboost


# -------------try other manner

parameters = {
              'max_depth': [5, 10, 15, 20, 25],
              'learning_rate': [0.01, 0.02, 0.05, 0.1, 0.15],
              'n_estimators': [500, 1000, 2000, 3000, 5000],
              'min_child_weight': [0, 2, 5, 10, 20],
              'max_delta_step': [0, 0.2, 0.6, 1, 2],
              'subsample': [0.6, 0.7, 0.8, 0.85, 0.95],
              'colsample_bytree': [0.5, 0.6, 0.7, 0.8, 0.9],
              'reg_alpha': [0, 0.25, 0.5, 0.75, 1],
              'reg_lambda': [0.2, 0.4, 0.6, 0.8, 1],
              'scale_pos_weight': [0.2, 0.4, 0.6, 0.8, 1]

}

xlf = xgboost.XGBClassifier(max_depth=10,
                learning_rate=0.01,
                n_estimators=2000,
                silent=True,
                objective='multi:softmax',
                nthread=-1,
                gamma=0,
                min_child_weight=1,
                max_delta_step=0,
                subsample=0.85,
                colsample_bytree=0.7,
                colsample_bylevel=1,
                reg_alpha=0,
                reg_lambda=1,
                scale_pos_weight=1,
                missing=None)
# 有了gridsearch我们便不需要fit函数
gsearch = GridSearchCV(xlf, param_grid=parameters, scoring='accuracy', cv=10)
gsearch.fit(X_train, y_train)

print("Best score: %0.3f" % gsearch.best_score_)
print("Best parameters set:")
best_parameters = gsearch.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))
y_pred = xlf.predict(X_test)
# -----------------



# report
print("Classification report for - \n{}:\n{}\n".format(clf25, metrics.classification_report(y_test, y_pred)))


# dumping model
import pickle
with open('./clf_xgb_haralick.pickle','wb') as f:
    pickle.dump(clf25,f)
# with open('./clf.pickle','rb') as f:
#     clf2=pickle.load(f)

print('done')




KeyboardInterrupt: 

In [None]:
# testing clf classifier

image_dataset = load_image_files("Fabrics_train/",(120,120))

clf.predict(dataset.data,dataset.target)


p=Path('./Fabrics_train_copy/')
tlen=len([i for i in ])
label=[i]


In [14]:
clf

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [30]:
import cv2
import mahotas as mt
from skimage import io
m=io.imread('./Fabrics_train_lowClass/Acrylic/0.png')
h=(mt.features.haralick((cv2.cvtColor(m,cv2.COLOR_RGB2GRAY))))
print((h.ndim))
print(m.ndim)

2
3


In [27]:
from skimage.color import rgb2grey
sk_grey=rgb2grey(m)
print(sk_grey)

[[0.4462851  0.44236353 0.4485251  ... 0.39191882 0.36332902 0.35857412]
 [0.45217882 0.45217882 0.45020667 ... 0.40258235 0.36838941 0.3518698 ]
 [0.45244667 0.45244667 0.45720157 ... 0.42050863 0.38913608 0.35384196]
 ...
 [0.89082902 0.88607412 0.88493529 ... 0.89029333 0.89029333 0.89029333]
 [0.87709216 0.86813294 0.8580498  ... 0.89196    0.8849651  0.87011216]
 [0.87288784 0.86813294 0.86699412 ... 0.88104353 0.87011216 0.86226902]]


In [52]:
cv_grey=cv2.cvtColor(m,cv2.COLOR_RGB2GRAY)
print(cv_grey)
print(sk_grey)

[[114 113 114 ... 100  93  92]
 [115 115 115 ... 103  94  90]
 [115 115 117 ... 107  99  90]
 ...
 [227 226 226 ... 227 227 227]
 [224 221 219 ... 227 226 222]
 [223 221 221 ... 225 222 220]]
[[0.4462851  0.44236353 0.4485251  ... 0.39191882 0.36332902 0.35857412]
 [0.45217882 0.45217882 0.45020667 ... 0.40258235 0.36838941 0.3518698 ]
 [0.45244667 0.45244667 0.45720157 ... 0.42050863 0.38913608 0.35384196]
 ...
 [0.89082902 0.88607412 0.88493529 ... 0.89029333 0.89029333 0.89029333]
 [0.87709216 0.86813294 0.8580498  ... 0.89196    0.8849651  0.87011216]
 [0.87288784 0.86813294 0.86699412 ... 0.88104353 0.87011216 0.86226902]]
