In [103]:
import numpy as np
import cv2
import os
from sklearn.externals import joblib
from pomegranate import *

In [3]:
def crop_center(img, crop_size):
    s = img.shape
    v = s[0]//2 - crop_size//2
    h = s[1]//2 - crop_size//2
    cropped_img = img[v:v + crop_size, h: h + crop_size]
    return cropped_img

In [3]:
def normalize_patches(patches, shuffle=True):
    if shuffle:
        np.random.shuffle(patches)
    patches = np.float32(patches)
    # need to do this patch by patch, otherwise memory consuming
    for row in xrange(patches.shape[0]):
        patch = patches[row,:]
        norm_patch = patch - np.mean(patch)
        norm_patch = norm_patch - np.min(norm_patch)
        norm_patch /= np.max(norm_patch)
        patches[row,:]=norm_patch
    patches[np.isnan(patches)] = 0
    return patches

In [2]:
def generate_training_patches(processed_set, patches_per_image, b, seed=None):
    imagenames = os.listdir(processed_set)
    total_num_of_patches = patches_per_image * len(imagenames)
    patches = np.empty((total_num_of_patches, b * b), np.uint8)
    np.random.seed(seed)
    j = 0
    for name in imagenames:
        img = cv2.imread(processed_set + name,  0)
        image_size = img.shape[0]
        for i in xrange(patches_per_image):
            m = np.random.randint(0, image_size - b + 1)
            n = np.random.randint(0, image_size - b + 1)      
            patches[j] = np.reshape(img[m:m + b, n:n + b], (1, b*b))
            j += 1
    patches = normalize_patches(patches)
    return patches

In [3]:
def train(processed_set, max_iterations = 50, num_threads=2, \
          n_components = 200, batch_size = 80000):
    image_size = processed_set.split('/')[2]
    patch_dir = '../Models/' + image_size + '/'
    check_dir = '../Models/pomegranate/' + image_size + '/' 
    processing = processed_set.split('/')[3]
    if os.path.exists(check_dir + 'GMM_' + processing + '.pkl'):
        print processing + ' with image size: ' + str(image_size) + ' already trained.'
        return 0
    print 'training ' + processing + ' with image size: ' + str(image_size)
    patches_path = patch_dir + 'Patches_' + processing + '.pkl'
    if os.path.isfile(patches_path):
        patches = joblib.load(patchhes_path, 'r') 
    else:
        print 'Generating patches...'
        num_patches_per_image = 500
        patch_size = 8
        patches = generate_training_patches(processedd_set, num_patches_per_image, patch_size)
        if not os.path.exists(patch_dir):
            os.makedirs(patch_dir)
        joblib.dump(patches, patches_path)
    GMM = GeneralMixtureModel.from_samples(MultivariateGaussianDistribution, n_components=n_components, \
                                       X=patches, max_iterations=max_iterations, batch_size=batch_size, \
                                      n_jobs=num_threads, verbose=True)
    print 'Saving GMM_' + processing + '.pkl...'
    joblib.dump(GMM, check_dir + 'GMM_' + processing + '.pkl')

In [5]:
Processings = ['ORI', 'GF', 'JPG', 'MF', 'RS', 'USM', 'WGN']
im_sizes = [16, 32, 512]
for im_size in im_sizes:
    Processings_TRN = ['../DataSet/' +str(im_size) + '/' + proc + '/TRN/' for proc in Processings]
    for processed_set in Processings_TRN:
        train(processed_set, batch_size=70000)
    print 'Training for image size: ' + str(im_size) + ' completed.'
print 'Training completed.'

ORI with image size: 16 already trained.
GF with image size: 16 already trained.
JPG with image size: 16 already trained.
MF with image size: 16 already trained.
RS with image size: 16 already trained.
USM with image size: 16 already trained.
WGN with image size: 16 already trained.
Training for image size: 16 completed.
ORI with image size: 32 already trained.
GF with image size: 32 already trained.
JPG with image size: 32 already trained.
MF with image size: 32 already trained.
RS with image size: 32 already trained.
USM with image size: 32 already trained.
WGN with image size: 32 already trained.
Training for image size: 32 completed.
ORI with image size: 512 already trained.
GF with image size: 512 already trained.
JPG with image size: 512 already trained.
MF with image size: 512 already trained.
training RS with image size: 512


  # This is added back by InteractiveShellApp.init_path()


[1] Improvement: 7015569.1714	Time (s): 631.7
[2] Improvement: 1385401.69971	Time (s): 633.4
[3] Improvement: 710556.269261	Time (s): 638.2
[4] Improvement: 598718.84971	Time (s): 634.9
[5] Improvement: 189229.438361	Time (s): 634.5
[6] Improvement: 155946.432916	Time (s): 637.1
[7] Improvement: 120996.605414	Time (s): 639.4
[8] Improvement: 95711.9580049	Time (s): 638.9
[9] Improvement: 76585.8847388	Time (s): 638.3
[10] Improvement: 75624.518366	Time (s): 639.4
[11] Improvement: 66830.3962671	Time (s): 641.9
[12] Improvement: 57893.1773736	Time (s): 639.5
[13] Improvement: 41810.9741499	Time (s): 638.2
[14] Improvement: 40154.3242707	Time (s): 639.7
[15] Improvement: 39171.0971249	Time (s): 637.7
[16] Improvement: 39000.2152262	Time (s): 639.4
[17] Improvement: 33873.6176957	Time (s): 643.4
[18] Improvement: 45659.2681283	Time (s): 640.4
[19] Improvement: -6317.07717045	Time (s): 643.3
Total Improvement: 10782416.8209
Total Time (s): 12760.3930
Saving GMM_RS.pkl...
training USM with 