In [11]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
import math
from skimage.io import imread
from skimage.transform import resize
import cv2
import mahotas as mt


In [50]:
def getFeature(img):
    return mt.features.haralick(img)
def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    
#     max image numbers
    max_images=1000
    
#     revised code
#   get the total length
    total_len=0
    for direct in folders:
        for n,f in enumerate(direct.iterdir()):
            total_len+=1
            if n>max_images:
                break
            
    print('length: '+str(total_len))
    flat_data=[]
    target=np.zeros(total_len)
    index=0
    
    for i, direc in enumerate(folders):
        print('importing files from '+direc.name)
        for n,file in enumerate(direc.iterdir()):
            img = cv2.cvtColor(imread(file),cv2.COLOR_RGB2GRAY)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            img_resized=img_resized.astype(np.int)
            img_resized=getFeature(img_resized).flatten()
#             flat_data[index]=(img_resized.flatten())
            flat_data.append(img_resized)
#             images.append(img_resized)
            target[index]=i
            index+=1
            if n>max_images:
                break
            del img
            del img_resized
    flat_data=np.array(flat_data)
# #     original code
##     images = []
#     flat_data = []
#     target = []
#     for i, direc in enumerate(folders):
#         print('adding files to '+direc.name)
#         for file in direc.iterdir():
#             img = imread(file)
#             img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
#             flat_data.insert(img_resized.flatten(),0) 
# #             images.append(img_resized)
#             target.insert(i,0)
            
#     flat_data = np.array(flat_data)
#     target = np.array(target)
#     images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
#                  images=images,
                 DESCR=descr)

In [51]:
image_dataset = load_image_files("Fabrics_train_lowClass/",(120,120))

length: 10020
importing files from Felt
importing files from Artificial_leather
importing files from Cotton
importing files from Chenille
importing files from Blended
importing files from Corduroy
importing files from Denim
importing files from Artificial_fur
importing files from Acrylic
importing files from Crepe


In [52]:

# split data
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)
# train data with parameter optimization
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)

clf.fit(X_train, y_train)

# predict data
y_pred = clf.predict(X_test)

# report
print("Classification report for - \n{}:\n{}\n".format(clf, metrics.classification_report(y_test, y_pred)))


# # dumping model

# import pickle
# with open('./clf.pickle','wb') as f:
#     pickle.dump(clf,f)
# # with open('./clf.pickle','rb') as f:
# #     clf2=pickle.load(f)
# print('done')



Classification report for - 
GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']}, {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0):
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       292
         1.0       0.00      0.00      0.00       287
         2.0       0.00      0.00      0.00       297
         3.0       0.46      0.05      0.09       325
         4.0       0.10      1.00      0.18       285
         5.0       0.00      0.00      0.

  'precision', 'predicted', average, warn_for)
