In [23]:
import sys
import os
import cv2
import itertools
import pickle
import numpy as np
import scipy.io
from PIL import Image, ImageFilter
from random import shuffle

In [2]:
def deskew(img):
    m = cv2.moments(img)
    SZ=40
    if abs(m['mu02']) < 1e-2:
        # no deskewing needed. 
        return img.copy()
    # Calculate skew based on central momemts. 
    skew = m['mu11']/m['mu02']
    # Calculate affine transform to correct skewness. 
    M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]])
    # Apply affine transform
    img = cv2.warpAffine(img, M, (SZ, SZ), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
    return img

In [3]:
structures = [x for x in os.walk('../Caltech10/')]

In [5]:
def pickle_operating(fname, item):
    # save or load the pickle file.
    file_name = '%s.pickle' % fname
    print(file_name)
    if not item:
        with open(file_name, 'rb') as fs:
            item = pickle.load(fs)
            return item
    else:
        with open(file_name, 'wb') as fs:
            pickle.dump(item, fs, protocol=pickle.HIGHEST_PROTOCOL)

In [6]:
def save_imgs(folders):
    dataset = {'categories': {}, 'data': {}}
    data = []
    label = 0
    size = 128, 128
    for f in folders:
        dataset['categories'][f[0]] = label
        dataset['data'][label] = []
        for i in f[2]:
            image = Image.open("%s/%s"%(f[0], i))
            image.thumbnail(size)
            img = image.convert("L").filter(ImageFilter.GaussianBlur(radius = 2))
            im_data = np.asarray( img, dtype="int64" )
            dataset['data'][label].append((im_data, label))
        label += 1
    return dataset

In [9]:
dataset = save_imgs(structures)
pickle_operating('dataset', dataset)
dataset_1 = pickle_operating('dataset', None)

dataset.pickle
dataset.pickle


In [19]:
def train_test_splitting(size, data):
    train_data, test_data = [], []
    for i in data.keys():
        x = range(len(data[i]))
        shuffle(x)
        train_size = int(size*len(data[i]))
        train_indexes, test_indexes = x[:train_size], x[train_size:]
        train_data.append(np.array(data[i])[train_indexes]) 
        test_data.append(np.array(data[i])[test_indexes])
    return train_data, test_data

In [24]:
dataset['experiment'] = {}
dataset['experiment']['train'], dataset['experiment']['test'] = train_test_splitting(0.5, dataset_1['data'])
scipy.io.savemat('myexpdata.mat', 
                 mdict={'train': dataset['experiment']['train'], 
                        'test': dataset['experiment']['test']})