In [1]:
import numpy as np
import cv2
from math import floor
from Functions import *
import matplotlib.image as mpimg
import time
from scipy.misc import imresize
import scipy.io as io
#from multiprocessing import Pool, Lock
from sklearn.cluster import KMeans
from sklearn.externals import joblib

np.set_printoptions(suppress=True)



In [2]:
R = 4
patchSize = 11
patchMargin = floor(patchSize/2)
dim_pca = 1
patchNum = 3000

testPath = 'trainingData'
pcaSavePath = 'x'+str(R)+'_'+str(dim_pca)+'_'+str(patchNum)+'_patch_PCA_matrix.mat'
meanSavePath = 'x'+str(R)+'_'+str(dim_pca)+'_'+str(patchNum)+'_patch_mean_matrix.mat'
kmeansSavePath = 'x'+str(R)+'_'+str(dim_pca)+'_'+str(patchNum)+'_kmeans.model'
filelist = make_dataset(testPath)


sample = None
kmeans = KMeans(n_clusters=R*R, random_state=None)


In [3]:
imagecount = 1
for image in filelist:
    print('\r', end='')
    print('' * 60, end='')
    print('\r Processing ' + str(imagecount) + '/' + str(len(filelist)) + ' image (' + image + ')')
    im_uint8 = cv2.imread(image)
    im_mp = mpimg.imread(image)
    if len(im_mp.shape) == 2:
        im_uint8 = im_uint8[:,:,0]
    im_uint8 = modcrop(im_uint8, R)
    if len(im_uint8.shape) > 2:
        im_ycbcr = BGR2YCbCr(im_uint8)
        im = im_ycbcr[:, :, 0]
    else:
        im = im_uint8
    im_double = im2double(im)
    H, W = im.shape
    region = (slice(patchMargin, H - patchMargin), slice(patchMargin, W - patchMargin))
    #start = time.time()
    imL = imresize(im_double, 1 / R, interp='bicubic', mode='F')
    im_bicubic = imresize(imL, (H, W), interp='bicubic', mode='F')
    im_bicubic = im_bicubic.astype('float64')
    im_bicubic = np.clip(im_bicubic, 0, 1)
    im_LR = np.zeros((H+patchSize-1,W+patchSize-1))
    im_LR[(patchMargin):(H+patchMargin),(patchMargin):(W+patchMargin)] = im_bicubic
    im_result = np.zeros((H, W))
    #im_GX, im_GY = np.gradient(im_LR)
    index = np.array(range(im_LR.size)).reshape(im_LR.shape)
    offset = np.array(index[0:H, 0:W].ravel())
    rand = np.random.randint(0, offset.size, patchNum)
    gridon = index[0:patchSize, 0:patchSize]
    grid = np.tile(gridon[..., None], [1, 1, patchNum]) + np.tile(rand, [patchSize, patchSize, 1])
    if imagecount == 1:
        sample = im_LR.ravel()[grid]
    else:
        sample = np.concatenate((sample, im_LR.ravel()[grid]), axis=2)

    imagecount += 1
    
sample = sample.reshape(patchSize*patchSize, -1)
print(sample.shape)

# PCA
mean = np.mean(sample, axis=1).reshape(-1,1)
sample -= mean
ssT = np.dot(sample, sample.T) / sample.shape[1]
U, S, V = np.linalg.svd(ssT)
pca_matrix = U[:, :dim_pca].T
sample_reduced = np.dot(pca_matrix, sample).reshape(-1, dim_pca)
print(sample_reduced.shape)
# Store the PCA
io.savemat(meanSavePath, {'m': mean})
io.savemat(pcaSavePath, {'p': pca_matrix})
print('PCA matrix saved\n')

# Clustering
kmeans = kmeans.fit(sample_reduced)
joblib.dump(kmeans, kmeansSavePath)
print('kmeans model saved\r')
# print(kmeans.cluster_centers_)



    


 Processing 1/200 image (trainingData/157036.jpg)
 Processing 2/200 image (trainingData/187029.jpg)
 Processing 3/200 image (trainingData/71046.jpg)


`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.3.0.
Use Pillow instead: ``numpy.array(Image.fromarray(arr).resize())``.


 Processing 4/200 image (trainingData/299091.jpg)
 Processing 5/200 image (trainingData/242078.jpg)
 Processing 6/200 image (trainingData/232038.jpg)
 Processing 7/200 image (trainingData/173036.jpg)
 Processing 8/200 image (trainingData/198054.jpg)
 Processing 9/200 image (trainingData/388016.jpg)
 Processing 10/200 image (trainingData/292066.jpg)
 Processing 11/200 image (trainingData/189011.jpg)
 Processing 12/200 image (trainingData/59078.jpg)
 Processing 13/200 image (trainingData/108041.jpg)
 Processing 14/200 image (trainingData/66075.jpg)
 Processing 15/200 image (trainingData/20008.jpg)
 Processing 16/200 image (trainingData/370036.jpg)
 Processing 17/200 image (trainingData/41025.jpg)
 Processing 18/200 image (trainingData/207056.jpg)
 Processing 19/200 image (trainingData/314016.jpg)
 Processing 20/200 image (trainingData/188091.jpg)
 Processing 21/200 image (trainingData/56028.jpg)
 Processing 22/200 image (trainingData/134008.jpg)
 Processing 23/200 image (trainingData/153

# Playground

In [6]:
sample.shape

(121, 600000)

In [None]:
kmeans.cluster_centers_

In [9]:
np.count_nonzero(kmeans.labels_)

340067

In [8]:
np.sum(kmeans.labels_==3)

614

In [7]:
S

array([4.15005383, 0.25179007, 0.19843432, 0.06874853, 0.05521984,
       0.03884089, 0.02875973, 0.02323614, 0.01864805, 0.01738227,
       0.01355224, 0.0096365 , 0.00923075, 0.00827282, 0.00754641,
       0.00749147, 0.00498264, 0.00470902, 0.00449849, 0.00443665,
       0.00375496, 0.0033786 , 0.00306299, 0.00298789, 0.00276854,
       0.00267204, 0.0024356 , 0.00225047, 0.00206228, 0.0020455 ,
       0.00189912, 0.00128627, 0.00126568, 0.00112841, 0.0010248 ,
       0.000984  , 0.00077913, 0.00074539, 0.00053144, 0.00047638,
       0.00046261, 0.00039562, 0.00037052, 0.00034805, 0.00032054,
       0.00031788, 0.00025687, 0.00024069, 0.00018133, 0.00017973,
       0.00016796, 0.00016049, 0.00015801, 0.00015672, 0.00011388,
       0.00010976, 0.00008082, 0.00007999, 0.00007875, 0.00007712,
       0.00006935, 0.00006722, 0.00006524, 0.00004899, 0.00004523,
       0.00003846, 0.00003653, 0.00003451, 0.0000337 , 0.00003192,
       0.00003024, 0.00003022, 0.00002386, 0.00002267, 0.00001

In [5]:
pca_matrix = io.loadmat('2000_patch_PCA_matrix.mat')['p']
pca_matrix.shape

(1, 121)

In [23]:
kmeans = joblib.load('1_2000_kmeans.model')
np.sum(kmeans.labels_==0)

134849