In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
import math
from skimage.io import imread
from skimage.transform import resize
from skimage.color import rgb2gray
from skimage.feature import hog


In [2]:

def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"

    
#     revised code
#   get the total length
    total_len=0
    for direct in folders:
        for i,f in enumerate(direct.iterdir()):
            total_len+=1
            if i>500:
                break
    print('length: '+str(total_len))
    flat_data=[]
    target=np.zeros(total_len)
    index=0
    
    for i, direc in enumerate(folders):
        print('adding files to '+direc.name)
        for n,file in enumerate(direc.iterdir()):
            img = rgb2gray(imread(file))
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
#             flat_data[index]=(img_resized.flatten())
            flat_data.append(getFeature(img_resized))
#             images.append(img_resized)
            target[index]=i
            index+=1
            del img
            del img_resized
            if n>500:
                break
    flat_data=np.array(flat_data)
# #     original code
##     images = []
#     flat_data = []
#     target = []
#     for i, direc in enumerate(folders):
#         print('adding files to '+direc.name)
#         for file in direc.iterdir():
#             img = imread(file)
#             img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
#             flat_data.insert(img_resized.flatten(),0) 
# #             images.append(img_resized)
#             target.insert(i,0)
            
#     flat_data = np.array(flat_data)
#     target = np.array(target)
#     images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
#                  images=images,
                 DESCR=descr)

In [3]:
# def getFeature(img_matrix):
#     ppc = 16
#     hog_images = []
#     hog_features = []
#     for image in img_matrix:
#         fd = hog(image, orientations=8, pixels_per_cell=(ppc,ppc),cells_per_block=(4, 4),block_norm= 'L2')
#         hog_features.append(fd)
#     return hog_features
def getFeature(img):
    return hog(img,orientations=8,pixels_per_cell=(16,16),cells_per_block=(4,4),block_norm='L2')

In [4]:
image_dataset = load_image_files("Fabrics_train_lowClass/",(100,100))
# image_dataset = load_image_files("images/",(100,100))

# split data
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)

# train data with parameter optimization
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['rbf']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_train, y_train)

# predict data
y_pred = clf.predict(X_test)

# report
print("Classification report for - \n{}:\n{}\n".format(clf, metrics.classification_report(y_test, y_pred)))


# dumping model

import pickle
with open('./clf_hog_rbf.pickle','wb') as f:
    pickle.dump(clf,f)
# with open('./clf.pickle','rb') as f:
#     clf2=pickle.load(f)
print('done')

length: 5020
adding files to Felt
adding files to Artificial_leather
adding files to Cotton
adding files to Chenille
adding files to Blended
adding files to Corduroy
adding files to Denim
adding files to Artificial_fur
adding files to Acrylic
adding files to Crepe




Classification report for - 
GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['rbf']}, {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0):
              precision    recall  f1-score   support

         0.0       0.41      0.64      0.50       149
         1.0       0.52      0.55      0.53       153
         2.0       0.28      0.25      0.26       159
         3.0       0.30      0.19      0.23       159
         4.0       0.19      0.28      0.23       123
         5.0       0.23      0.19      0.21 