In [1]:
#installing and importing necessary packages
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.6.5-cp37-cp37m-manylinux2014_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 2.8 MB/s eta 0:00:01
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.6.5


In [2]:
import numpy as np 
import faiss  # this will import the faiss library

In [3]:
import pandas as pd

In [4]:
import numpy as np
from numpy.linalg import norm
import pickle
from tqdm import tqdm, tqdm_notebook
import os
import time
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

In [5]:
model = ResNet50(weights='imagenet', include_top=False,
                 input_shape=(180, 180, 3))
def extract_features(img_path, model):
    input_shape = (180, 180, 3)
    img = image.load_img(img_path, target_size=(
        input_shape[0], input_shape[1]))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    normalized_features = flattened_features / norm(flattened_features)
    return normalized_features

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [6]:
features = extract_features('../input/style1/style/0_0_001.png', model)
print(len(features))

73728


In [7]:
extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']
def get_file_list(root_dir):
    file_list = []
    counter = 1
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(ext in filename for ext in extensions):
                file_list.append(os.path.join(root, filename))
                counter += 1
    return file_list

In [8]:
root_dir = '../input/style1/style/'
filenames = sorted(get_file_list(root_dir))

In [9]:
feature_list = []
for i in tqdm_notebook(range(len(filenames))):
    feature_list.append(extract_features(filenames[i], model))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=2184.0), HTML(value='')))




In [10]:
len(filenames)

2184

In [11]:
feature_list

[array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),
 array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),
 array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),
 array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),
 array([0.        , 0.        , 0.        , ..., 0.00050838, 0.        ,
        0.        ], dtype=float32),
 array([0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.00408064], dtype=float32),
 array([0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.00587873], dtype=float32),
 array([0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.00323893], dtype=float32),
 array([0.        , 0.        , 0.        , ..., 0.00089381, 0.        ,
        0.0055381 ], dtype=float32),
 array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),
 array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),
 array([0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.00370194], dtype=float32),
 array([0.      

In [12]:
feature_list=np.array(feature_list)

In [13]:
type(feature_list)

numpy.ndarray

In [14]:
feature_list.shape

(2184, 73728)

In [15]:
dimension = 73728    # dimensions of each vector                         
n = len(filenames)    # number of vectors                   
np.random.seed(1)             
db_vectors = feature_list #np.random.random((n, dimension)).astype('float32')

In [16]:
db_vectors.shape

(2184, 73728)

In [17]:
nlist = 1  # number of clusters
quantiser = faiss.IndexFlatL2(dimension)  
index = faiss.IndexIVFFlat(quantiser, dimension, nlist,   faiss.METRIC_L2)

In [18]:
print(index.is_trained)   # False
index.train(db_vectors)  # train on the database vectors
print(index.ntotal)   # 0
index.add(db_vectors)   # add the vectors and update the index
print(index.is_trained)  # True
print(index.ntotal)   # 200

False
0
True
2184


In [19]:
nprobe = 1  # find 2 most similar clusters
n_query = 2184
k = 10  # return 3 nearest neighbours
np.random.seed(0)   
query_vectors = feature_list#np.random.random((n_query, dimension)).astype('float32')
distances, indices = index.search(query_vectors, k)

In [20]:
distances

array([[0.        , 0.8546827 , 0.87401235, ..., 0.95472765, 0.98643804,
        0.98724055],
       [0.        , 1.0561398 , 1.1098307 , ..., 1.1811651 , 1.1843462 ,
        1.189358  ],
       [0.        , 0.782049  , 0.79888684, ..., 0.965763  , 1.0055927 ,
        1.0351467 ],
       ...,
       [0.        , 1.0930816 , 1.330262  , ..., 1.3843709 , 1.38475   ,
        1.3897922 ],
       [0.        , 0.84995955, 0.8606195 , ..., 1.0844729 , 1.096912  ,
        1.1072882 ],
       [0.        , 0.9771166 , 1.1183319 , ..., 1.1956933 , 1.1984475 ,
        1.2153476 ]], dtype=float32)

In [21]:
len(distances)

2184

In [22]:
indices

array([[   0,   52,   25, ...,   80,   35,   85],
       [   1,   79,  854, ...,  233,   80,  645],
       [   2,   19,   36, ...,    3,   39,   40],
       ...,
       [2181,  447,  718, ..., 2134, 1892, 1917],
       [2182, 2110,  575, ...,  558, 2169,  499],
       [2183,  430, 2169, ..., 2163,  477, 1097]])

In [23]:
faiss.write_index(index,"vector.index")  # save the index to 
# diskindex = faiss.read_index("vector.index")  # load the index

In [24]:
diskindex = faiss.read_index("vector.index")

In [25]:
type(feature_list)

numpy.ndarray

In [26]:
feature_list.shape

(2184, 73728)

In [27]:
pickle.dump(feature_list, open('features-caltech101-resnet.pickle', 'wb'))
pickle.dump(filenames, open('filenames-caltech101.pickle','wb'))

In [28]:
filenames = pickle.load(open('filenames-caltech101.pickle', 'rb'))
feature_list = pickle.load(open('features-caltech101-resnet.pickle', 'rb'))

In [29]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [30]:
indices

array([[   0,   52,   25, ...,   80,   35,   85],
       [   1,   79,  854, ...,  233,   80,  645],
       [   2,   19,   36, ...,    3,   39,   40],
       ...,
       [2181,  447,  718, ..., 2134, 1892, 1917],
       [2182, 2110,  575, ...,  558, 2169,  499],
       [2183,  430, 2169, ..., 2163,  477, 1097]])

In [31]:
filenames[1]

'../input/style1/style/0_0_002.png'

In [32]:
filenames

['../input/style1/style/0_0_001.png',
 '../input/style1/style/0_0_002.png',
 '../input/style1/style/0_0_003.png',
 '../input/style1/style/0_0_004.png',
 '../input/style1/style/0_0_005.png',
 '../input/style1/style/0_0_006.png',
 '../input/style1/style/0_0_007.png',
 '../input/style1/style/0_0_008.png',
 '../input/style1/style/0_0_009.png',
 '../input/style1/style/0_0_010.png',
 '../input/style1/style/0_0_011.png',
 '../input/style1/style/0_0_012.png',
 '../input/style1/style/0_0_013.png',
 '../input/style1/style/0_0_014.png',
 '../input/style1/style/0_0_015.png',
 '../input/style1/style/0_0_016.png',
 '../input/style1/style/0_0_017.png',
 '../input/style1/style/0_0_018.png',
 '../input/style1/style/0_0_019.png',
 '../input/style1/style/0_0_020.png',
 '../input/style1/style/0_0_021.png',
 '../input/style1/style/0_0_022.png',
 '../input/style1/style/0_0_023.png',
 '../input/style1/style/0_0_024.png',
 '../input/style1/style/0_0_025.png',
 '../input/style1/style/0_0_026.png',
 '../input/s

In [48]:
filenaam = [i.split('/')[4] for i in filenames]

In [49]:
filenaam

['0_0_001.png',
 '0_0_002.png',
 '0_0_003.png',
 '0_0_004.png',
 '0_0_005.png',
 '0_0_006.png',
 '0_0_007.png',
 '0_0_008.png',
 '0_0_009.png',
 '0_0_010.png',
 '0_0_011.png',
 '0_0_012.png',
 '0_0_013.png',
 '0_0_014.png',
 '0_0_015.png',
 '0_0_016.png',
 '0_0_017.png',
 '0_0_018.png',
 '0_0_019.png',
 '0_0_020.png',
 '0_0_021.png',
 '0_0_022.png',
 '0_0_023.png',
 '0_0_024.png',
 '0_0_025.png',
 '0_0_026.png',
 '0_0_027.png',
 '0_0_028.png',
 '0_0_029.png',
 '0_0_030.png',
 '0_0_031.png',
 '0_0_032.png',
 '0_0_033.png',
 '0_0_034.png',
 '0_0_035.png',
 '0_0_036.png',
 '0_0_037.png',
 '0_0_038.png',
 '0_0_039.png',
 '0_0_040.png',
 '0_0_041.png',
 '0_0_042.png',
 '0_0_043.png',
 '0_0_044.png',
 '0_0_045.png',
 '0_0_046.png',
 '0_0_047.png',
 '0_0_048.png',
 '0_0_049.png',
 '0_0_050.png',
 '0_0_051.png',
 '0_0_052.png',
 '0_0_053.png',
 '0_0_054.png',
 '0_0_055.png',
 '0_0_056.png',
 '0_0_057.png',
 '0_0_058.png',
 '0_0_059.png',
 '0_0_060.png',
 '0_0_061.png',
 '0_0_062.png',
 '0_0_06

In [50]:
index = []
for i in range(len(filenaam)):
  index.append(i)

In [51]:
index

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [52]:
df = pd.DataFrame()
# df['index'] = index
df['images'] = filenaam

In [53]:
df

Unnamed: 0,images
0,0_0_001.png
1,0_0_002.png
2,0_0_003.png
3,0_0_004.png
4,0_0_005.png
...,...
2179,6_9_017.png
2180,6_9_018.png
2181,6_9_019.png
2182,6_9_020.png


In [54]:
indices_df = pd.DataFrame(indices)
indices_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,52,25,70,22,32,43,80,35,85
1,1,79,854,149,902,1662,695,233,80,645
2,2,19,36,94,38,92,93,3,39,40
3,3,93,38,96,94,2,65,39,92,40
4,4,26,59,587,14,32,25,1208,602,1217
...,...,...,...,...,...,...,...,...,...,...
2179,2179,2178,321,2166,984,2124,955,336,245,2170
2180,2180,2174,1856,1836,568,472,1072,470,1851,1477
2181,2181,447,718,1879,2114,443,2178,2134,1892,1917
2182,2182,2110,575,2112,559,2111,483,558,2169,499


In [55]:
df

Unnamed: 0,images
0,0_0_001.png
1,0_0_002.png
2,0_0_003.png
3,0_0_004.png
4,0_0_005.png
...,...
2179,6_9_017.png
2180,6_9_018.png
2181,6_9_019.png
2182,6_9_020.png


In [56]:
df1 = df.to_dict()
df1 = df1['images']

In [57]:
df1

{0: '0_0_001.png',
 1: '0_0_002.png',
 2: '0_0_003.png',
 3: '0_0_004.png',
 4: '0_0_005.png',
 5: '0_0_006.png',
 6: '0_0_007.png',
 7: '0_0_008.png',
 8: '0_0_009.png',
 9: '0_0_010.png',
 10: '0_0_011.png',
 11: '0_0_012.png',
 12: '0_0_013.png',
 13: '0_0_014.png',
 14: '0_0_015.png',
 15: '0_0_016.png',
 16: '0_0_017.png',
 17: '0_0_018.png',
 18: '0_0_019.png',
 19: '0_0_020.png',
 20: '0_0_021.png',
 21: '0_0_022.png',
 22: '0_0_023.png',
 23: '0_0_024.png',
 24: '0_0_025.png',
 25: '0_0_026.png',
 26: '0_0_027.png',
 27: '0_0_028.png',
 28: '0_0_029.png',
 29: '0_0_030.png',
 30: '0_0_031.png',
 31: '0_0_032.png',
 32: '0_0_033.png',
 33: '0_0_034.png',
 34: '0_0_035.png',
 35: '0_0_036.png',
 36: '0_0_037.png',
 37: '0_0_038.png',
 38: '0_0_039.png',
 39: '0_0_040.png',
 40: '0_0_041.png',
 41: '0_0_042.png',
 42: '0_0_043.png',
 43: '0_0_044.png',
 44: '0_0_045.png',
 45: '0_0_046.png',
 46: '0_0_047.png',
 47: '0_0_048.png',
 48: '0_0_049.png',
 49: '0_0_050.png',
 50: '0_0_

In [58]:
indices_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,52,25,70,22,32,43,80,35,85
1,1,79,854,149,902,1662,695,233,80,645
2,2,19,36,94,38,92,93,3,39,40
3,3,93,38,96,94,2,65,39,92,40
4,4,26,59,587,14,32,25,1208,602,1217
...,...,...,...,...,...,...,...,...,...,...
2179,2179,2178,321,2166,984,2124,955,336,245,2170
2180,2180,2174,1856,1836,568,472,1072,470,1851,1477
2181,2181,447,718,1879,2114,443,2178,2134,1892,1917
2182,2182,2110,575,2112,559,2111,483,558,2169,499


In [59]:
indices_df = indices_df.replace(df1)
indices_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0_0_001.png,0_0_053.png,0_0_026.png,0_0_071.png,0_0_023.png,0_0_033.png,0_0_044.png,0_0_081.png,0_0_036.png,0_0_086.png
1,0_0_002.png,0_0_080.png,3_0_007.png,0_2_008.png,3_0_055.png,5_0_014.png,2_2_006.png,0_8_026.png,0_0_081.png,2_0_065.png
2,0_0_003.png,0_0_020.png,0_0_037.png,0_0_095.png,0_0_039.png,0_0_093.png,0_0_094.png,0_0_004.png,0_0_040.png,0_0_041.png
3,0_0_004.png,0_0_094.png,0_0_039.png,0_0_097.png,0_0_095.png,0_0_003.png,0_0_066.png,0_0_040.png,0_0_093.png,0_0_041.png
4,0_0_005.png,0_0_027.png,0_0_060.png,2_0_007.png,0_0_015.png,0_0_033.png,0_0_026.png,4_0_028.png,2_0_022.png,4_0_037.png
...,...,...,...,...,...,...,...,...,...,...
2179,6_9_017.png,6_9_016.png,1_2_002.png,6_9_004.png,3_2_046.png,6_7_007.png,3_2_017.png,1_2_017.png,1_0_009.png,6_9_008.png
2180,6_9_018.png,6_9_012.png,5_7_016.png,5_6_019.png,1_9_018.png,1_6_004.png,3_6_009.png,1_6_002.png,5_7_011.png,4_6_010.png
2181,6_9_019.png,1_5_014.png,2_2_029.png,5_9_005.png,6_6_005.png,1_5_010.png,6_9_016.png,6_7_017.png,5_9_018.png,6_0_022.png
2182,6_9_020.png,6_6_001.png,1_9_025.png,6_6_003.png,1_9_009.png,6_6_002.png,1_6_015.png,1_9_008.png,6_9_007.png,1_7_002.png


In [60]:
indices_df.to_csv('df.csv')
