In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
import math
from skimage.io import imread
from skimage.transform import resize
from skimage.color import rgb2gray

In [2]:

def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"

    
#     revised code
#   get the total length
    total_len=0
    for direct in folders:
        for f in direct.iterdir():
            total_len+=1
    print('length: '+str(total_len))
    flat_data=np.zeros( (total_len,dimension[0]*dimension[1]) )
    target=np.zeros(total_len)
    index=0
    
    for i, direc in enumerate(folders):
        print('adding files to '+direc.name)
        for file in direc.iterdir():
            img = rgb2gray(imread(file))
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            flat_data[index]=(img_resized.flatten()) 
#             images.append(img_resized)
            target[index]=i
            index+=1
            del img
            del img_resized
    
# #     original code
##     images = []
#     flat_data = []
#     target = []
#     for i, direc in enumerate(folders):
#         print('adding files to '+direc.name)
#         for file in direc.iterdir():
#             img = imread(file)
#             img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
#             flat_data.insert(img_resized.flatten(),0) 
# #             images.append(img_resized)
#             target.insert(i,0)
            
#     flat_data = np.array(flat_data)
#     target = np.array(target)
#     images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
#                  images=images,
                 DESCR=descr)

In [None]:
def getFeature(img_matrix):
    pass

In [3]:
image_dataset = load_image_files("Fabrics_train/",(100,100))

length: 7881
adding files to Velvet
adding files to Wool


KeyboardInterrupt: 

In [None]:

# split data
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)

# train data with parameter optimization
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_train, y_train)

# predict data
y_pred = clf.predict(X_test)

# report
print("Classification report for - \n{}:\n{}\n".format(clf, metrics.classification_report(y_test, y_pred)))


# dumping model

import pickle
with open('./clf.pickle','wb') as f:
    pickle.dump(clf,f)
# with open('./clf.pickle','rb') as f:
#     clf2=pickle.load(f)
print('done')