## Image Clustering

In [6]:
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
import numpy as np
from sklearn.cluster import KMeans
import glob

In [2]:
model = VGG16(weights='imagenet', include_top=False)
model.summary()











_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         


In [3]:
img_path = 'images/image_1.jpg'
img = image.load_img(img_path, target_size=(224, 224))
img_data = image.img_to_array(img)
img_data = np.expand_dims(img_data, axis=0)
img_data = preprocess_input(img_data)

vgg16_feature = model.predict(img_data)

print(vgg16_feature.shape)

(1, 7, 7, 512)


In [7]:
vgg16_feature_list = []
path = 'images/cl2/*'

for im in glob.glob(path):
    # process the files under the directory 'dogs' or 'cats'
    # ...

    img = image.load_img(im, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)

    vgg16_feature = model.predict(img_data)
    vgg16_feature_np = np.array(vgg16_feature)
    vgg16_feature_list.append(vgg16_feature_np.flatten())

In [8]:
vgg16_feature_list_np = np.array(vgg16_feature_list)
kmeans = KMeans(n_clusters=2, random_state=0).fit(vgg16_feature_list_np)

In [12]:
print(kmeans.labels_)

[1 0 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0]


In [10]:
glob.glob(path)

['images/cl2/0C396AF2A4B94CE38389F71394D7C859.jpg',
 'images/cl2/0FE26E2941A943A7B1EBCEBD4C739436.jpg',
 'images/cl2/11DBBE0366B84BCBB8389E01142D040F.jpg',
 'images/cl2/14236F54FAC14F75A3762AA9715D57B0.jpg',
 'images/cl2/1ACD18D5F1C1443085CFBB94A86A7D8D.jpg',
 'images/cl2/1E023D8C367E4E678AC225B18A36FFD1.jpg',
 'images/cl2/21115429C56E442AA4B9EB5CC7C6929C.jpg',
 'images/cl2/2B7077131E964A1DABC986FDBB01FCB6.jpg',
 'images/cl2/38B416F23E7E492D87BBC236A0293153.jpg',
 'images/cl2/3FDAE8EBF8814A1589A670D997A15000.jpg',
 'images/cl2/4AA2F26EBD9C44139D94C6448BB51E9A.jpg',
 'images/cl2/541D7A2CE4874F2A9B2F4FB084C46F03.jpg',
 'images/cl2/55B9C7C3D0C24BAEA8F8816447B4B5CD.jpg',
 'images/cl2/6AC7A353A31C42EE8B546ED3C90DE789.jpg',
 'images/cl2/6B1502054CA14BE2BD5975BBF8715493.jpg',
 'images/cl2/6C2EE2C3C73A4E58AB28CC0C2CD49865.jpg',
 'images/cl2/78EF20B05523424DA40D373F1FFA9DF8.jpg',
 'images/cl2/914F0D0B242B4FA588EC8BCBAAB11161.jpg',
 'images/cl2/B5F4E7FD11F3416290A6B792CE8B5D13.jpg',
 'images/cl2